npm - react-native-sherpa-onnx - Versions diffs - 0.2.0 → 0.3.0 - Mend

react-native-sherpa-onnx 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (175) hide show

package/README.md +232 -236
package/SherpaOnnx.podspec +68 -64
package/android/build.gradle +182 -192
package/android/codegen.gradle +57 -0
package/android/prebuilt-download.gradle +428 -0
package/android/prebuilt-versions.gradle +43 -0
package/android/proguard-rules.pro +10 -0
package/android/src/main/assets/testModels/add_mul_add.onnx +28 -0
package/android/src/main/assets/testModels/nnapi_internal_uint8_support.onnx +0 -0
package/android/src/main/assets/testModels/qnn_multi_ctx_embed.onnx +0 -0
package/android/src/main/cpp/CMakeLists.txt +166 -129
package/android/src/main/cpp/CMakePresets.json +54 -0
package/android/src/main/cpp/crypto/sha256.cpp +174 -0
package/android/src/main/cpp/crypto/sha256.h +16 -0
package/android/src/main/cpp/jni/archive/sherpa-onnx-archive-helper.cpp +404 -0
package/android/src/main/cpp/jni/archive/sherpa-onnx-archive-helper.h +56 -0
package/android/src/main/cpp/jni/archive/sherpa-onnx-archive-jni.cpp +181 -0
package/android/src/main/cpp/jni/audio/sherpa-onnx-audio-convert-jni.cpp +888 -0
package/{ios → android/src/main/cpp/jni/model_detect}/sherpa-onnx-common.h +18 -18
package/android/src/main/cpp/jni/model_detect/sherpa-onnx-detect-jni-common.cpp +86 -0
package/android/src/main/cpp/jni/model_detect/sherpa-onnx-detect-jni-common.h +20 -0
package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-helper.cpp +423 -0
package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-helper.h +55 -0
package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-stt.cpp +399 -0
package/android/src/main/cpp/jni/model_detect/sherpa-onnx-model-detect-tts.cpp +238 -0
package/{ios → android/src/main/cpp/jni/model_detect}/sherpa-onnx-model-detect.h +122 -89
package/android/src/main/cpp/jni/model_detect/sherpa-onnx-stt-wrapper.cpp +99 -0
package/android/src/main/cpp/jni/model_detect/sherpa-onnx-stt-wrapper.h +16 -0
package/android/src/main/cpp/jni/model_detect/sherpa-onnx-tts-wrapper.cpp +78 -0
package/android/src/main/cpp/jni/model_detect/sherpa-onnx-tts-wrapper.h +16 -0
package/android/src/main/cpp/jni/module/sherpa-onnx-module-jni.cpp +190 -0
package/android/src/main/cpp/jni/tts/sherpa-onnx-tts-zipvoice-jni.cpp +301 -0
package/android/src/main/java/com/sherpaonnx/SherpaOnnxArchiveHelper.kt +94 -0
package/android/src/main/java/com/sherpaonnx/{SherpaOnnxCoreHelper.kt → SherpaOnnxAssetHelper.kt} +350 -236
package/android/src/main/java/com/sherpaonnx/SherpaOnnxModule.kt +791 -483
package/android/src/main/java/com/sherpaonnx/SherpaOnnxSttHelper.kt +699 -109
package/android/src/main/java/com/sherpaonnx/SherpaOnnxTtsHelper.kt +1123 -668
package/android/src/main/java/com/sherpaonnx/ZipvoiceTtsWrapper.kt +187 -0
package/ios/SherpaOnnx+Assets.h +11 -0
package/ios/SherpaOnnx+Assets.mm +325 -0
package/ios/SherpaOnnx+STT.mm +455 -118
package/ios/SherpaOnnx+TTS.mm +1101 -712
package/ios/SherpaOnnx.h +17 -6
package/ios/SherpaOnnx.mm +206 -311
package/ios/SherpaOnnx.xcconfig +19 -19
package/ios/SherpaOnnxCoreMLHelper.swift +24 -0
package/ios/archive/sherpa-onnx-archive-helper.h +21 -0
package/ios/archive/sherpa-onnx-archive-helper.mm +296 -0
package/ios/libarchive_darwin_config.h +153 -0
package/{android/src/main/cpp/jni → ios/model_detect}/sherpa-onnx-common.h +18 -18
package/ios/model_detect/sherpa-onnx-model-detect-helper.h +49 -0
package/ios/model_detect/sherpa-onnx-model-detect-helper.mm +210 -0
package/ios/model_detect/sherpa-onnx-model-detect-stt.mm +344 -0
package/ios/model_detect/sherpa-onnx-model-detect-tts.mm +201 -0
package/{android/src/main/cpp/jni → ios/model_detect}/sherpa-onnx-model-detect.h +117 -89
package/ios/scripts/patch-libarchive-includes.sh +61 -0
package/ios/scripts/setup-ios-libarchive.sh +98 -0
package/ios/stt/sherpa-onnx-stt-wrapper.h +129 -0
package/ios/stt/sherpa-onnx-stt-wrapper.mm +523 -0
package/ios/{sherpa-onnx-tts-wrapper.h → tts/sherpa-onnx-tts-wrapper.h} +90 -85
package/ios/{sherpa-onnx-tts-wrapper.mm → tts/sherpa-onnx-tts-wrapper.mm} +376 -345
package/lib/module/NativeSherpaOnnx.js +3 -0
package/lib/module/NativeSherpaOnnx.js.map +1 -1
package/lib/module/audio/index.js +22 -0
package/lib/module/audio/index.js.map +1 -0
package/lib/module/diarization/index.js +1 -1
package/lib/module/diarization/index.js.map +1 -1
package/lib/module/download/ModelDownloadManager.js +918 -0
package/lib/module/download/ModelDownloadManager.js.map +1 -0
package/lib/module/download/extractTarBz2.js +53 -0
package/lib/module/download/extractTarBz2.js.map +1 -0
package/lib/module/download/index.js +6 -0
package/lib/module/download/index.js.map +1 -0
package/lib/module/download/validation.js +178 -0
package/lib/module/download/validation.js.map +1 -0
package/lib/module/enhancement/index.js +1 -1
package/lib/module/enhancement/index.js.map +1 -1
package/lib/module/index.js +41 -3
package/lib/module/index.js.map +1 -1
package/lib/module/separation/index.js +1 -1
package/lib/module/separation/index.js.map +1 -1
package/lib/module/stt/index.js +127 -60
package/lib/module/stt/index.js.map +1 -1
package/lib/module/stt/sttModelLanguages.js +512 -0
package/lib/module/stt/sttModelLanguages.js.map +1 -0
package/lib/module/stt/types.js +53 -1
package/lib/module/stt/types.js.map +1 -1
package/lib/module/tts/index.js +216 -289
package/lib/module/tts/index.js.map +1 -1
package/lib/module/tts/types.js +86 -1
package/lib/module/tts/types.js.map +1 -1
package/lib/module/types.js.map +1 -1
package/lib/module/utils.js +86 -73
package/lib/module/utils.js.map +1 -1
package/lib/module/vad/index.js +1 -1
package/lib/module/vad/index.js.map +1 -1
package/lib/typescript/src/NativeSherpaOnnx.d.ts +192 -38
package/lib/typescript/src/NativeSherpaOnnx.d.ts.map +1 -1
package/lib/typescript/src/audio/index.d.ts +13 -0
package/lib/typescript/src/audio/index.d.ts.map +1 -0
package/lib/typescript/src/diarization/index.d.ts +3 -2
package/lib/typescript/src/diarization/index.d.ts.map +1 -1
package/lib/typescript/src/download/ModelDownloadManager.d.ts +108 -0
package/lib/typescript/src/download/ModelDownloadManager.d.ts.map +1 -0
package/lib/typescript/src/download/extractTarBz2.d.ts +14 -0
package/lib/typescript/src/download/extractTarBz2.d.ts.map +1 -0
package/lib/typescript/src/download/index.d.ts +7 -0
package/lib/typescript/src/download/index.d.ts.map +1 -0
package/lib/typescript/src/download/validation.d.ts +57 -0
package/lib/typescript/src/download/validation.d.ts.map +1 -0
package/lib/typescript/src/enhancement/index.d.ts +3 -2
package/lib/typescript/src/enhancement/index.d.ts.map +1 -1
package/lib/typescript/src/index.d.ts +26 -2
package/lib/typescript/src/index.d.ts.map +1 -1
package/lib/typescript/src/separation/index.d.ts +3 -2
package/lib/typescript/src/separation/index.d.ts.map +1 -1
package/lib/typescript/src/stt/index.d.ts +31 -43
package/lib/typescript/src/stt/index.d.ts.map +1 -1
package/lib/typescript/src/stt/sttModelLanguages.d.ts +52 -0
package/lib/typescript/src/stt/sttModelLanguages.d.ts.map +1 -0
package/lib/typescript/src/stt/types.d.ts +196 -9
package/lib/typescript/src/stt/types.d.ts.map +1 -1
package/lib/typescript/src/tts/index.d.ts +25 -211
package/lib/typescript/src/tts/index.d.ts.map +1 -1
package/lib/typescript/src/tts/types.d.ts +148 -25
package/lib/typescript/src/tts/types.d.ts.map +1 -1
package/lib/typescript/src/types.d.ts +0 -32
package/lib/typescript/src/types.d.ts.map +1 -1
package/lib/typescript/src/utils.d.ts +28 -13
package/lib/typescript/src/utils.d.ts.map +1 -1
package/lib/typescript/src/vad/index.d.ts +3 -2
package/lib/typescript/src/vad/index.d.ts.map +1 -1
package/package.json +250 -222
package/scripts/check-qnn-support.sh +78 -0
package/scripts/setup-ios-framework.sh +379 -282
package/src/NativeSherpaOnnx.ts +474 -251
package/src/audio/index.ts +32 -0
package/src/diarization/index.ts +4 -2
package/src/download/ModelDownloadManager.ts +1325 -0
package/src/download/extractTarBz2.ts +78 -0
package/src/download/index.ts +43 -0
package/src/download/validation.ts +279 -0
package/src/enhancement/index.ts +4 -2
package/src/index.tsx +78 -27
package/src/separation/index.ts +4 -2
package/src/stt/index.ts +249 -89
package/src/stt/sttModelLanguages.ts +237 -0
package/src/stt/types.ts +263 -9
package/src/tts/index.ts +470 -458
package/src/tts/types.ts +373 -218
package/src/types.ts +0 -44
package/src/utils.ts +145 -131
package/src/vad/index.ts +4 -2
package/third_party/ffmpeg_prebuilt/ANDROID_RELEASE_TAG +1 -0
package/third_party/libarchive_prebuilt/ANDROID_RELEASE_TAG +1 -0
package/third_party/libarchive_prebuilt/IOS_RELEASE_TAG +1 -0
package/third_party/sherpa-onnx-prebuilt/ANDROID_RELEASE_TAG +1 -0
package/third_party/sherpa-onnx-prebuilt/IOS_RELEASE_TAG +1 -0
package/android/src/main/cpp/include/sherpa-onnx/c-api/c-api.h +0 -1918
package/android/src/main/cpp/include/sherpa-onnx/c-api/cxx-api.h +0 -841
package/android/src/main/cpp/jni/sherpa-onnx-model-detect.cpp +0 -541
package/android/src/main/cpp/jni/sherpa-onnx-stt-jni.cpp +0 -336
package/android/src/main/cpp/jni/sherpa-onnx-stt-wrapper.cpp +0 -222
package/android/src/main/cpp/jni/sherpa-onnx-stt-wrapper.h +0 -68
package/android/src/main/cpp/jni/sherpa-onnx-tts-jni.cpp +0 -823
package/android/src/main/cpp/jni/sherpa-onnx-tts-wrapper.cpp +0 -387
package/android/src/main/cpp/jni/sherpa-onnx-tts-wrapper.h +0 -147
package/ios/Frameworks/sherpa_onnx.xcframework.zip +0 -0
package/ios/include/sherpa-onnx/c-api/c-api.h +0 -1918
package/ios/include/sherpa-onnx/c-api/cxx-api.h +0 -841
package/ios/sherpa-onnx-model-detect.mm +0 -441
package/ios/sherpa-onnx-stt-wrapper.h +0 -48
package/ios/sherpa-onnx-stt-wrapper.mm +0 -201
package/scripts/copy-headers.js +0 -184
package/scripts/setup-assets.js +0 -323

package/ios/{sherpa-onnx-tts-wrapper.mm → tts/sherpa-onnx-tts-wrapper.mm} RENAMED Viewed

@@ -1,345 +1,376 @@
-#include "sherpa-onnx-tts-wrapper.h"
-#include "sherpa-onnx-model-detect.h"
-#include <algorithm>
-#include <cctype>
-#include <cstring>
-#include <fstream>
-#include <optional>
-#include <sstream>
-// iOS logging
-#ifdef __APPLE__
-#include <Foundation/Foundation.h>
-#include <cstdio>
-#define LOGI(fmt, ...) NSLog(@"TtsWrapper: " fmt, ##__VA_ARGS__)
-#define LOGE(fmt, ...) NSLog(@"TtsWrapper ERROR: " fmt, ##__VA_ARGS__)
-#else
-#define LOGI(...)
-#define LOGE(...)
-#endif
-// Use C++17 filesystem (podspec enforces C++17)
-#include <filesystem>
-namespace fs = std::filesystem;
-// sherpa-onnx headers - use C++ API (RAII wrapper around C API)
-#include "sherpa-onnx/c-api/cxx-api.h"
-namespace sherpaonnx {
-class TtsWrapper::Impl {
-public:
-    bool initialized = false;
-    std::string modelDir;
-    std::optional<sherpa_onnx::cxx::OfflineTts> tts;
-};
-TtsWrapper::TtsWrapper() : pImpl(std::make_unique<Impl>()) {
-    LOGI("TtsWrapper created");
-}
-TtsWrapper::~TtsWrapper() {
-    release();
-    LOGI("TtsWrapper destroyed");
-}
-TtsInitializeResult TtsWrapper::initialize(
-    const std::string& modelDir,
-    const std::string& modelType,
-    int32_t numThreads,
-    bool debug,
-    const std::optional<float>& noiseScale,
-    const std::optional<float>& noiseScaleW,
-    const std::optional<float>& lengthScale
-) {
-    TtsInitializeResult result;
-    result.success = false;
-    if (pImpl->initialized) {
-        release();
-    }
-    if (modelDir.empty()) {
-        LOGE("TTS: Model directory is empty");
-        return result;
-    }
-    try {
-        sherpa_onnx::cxx::OfflineTtsConfig config;
-        config.model.num_threads = numThreads;
-        config.model.debug = debug;
-        auto detect = DetectTtsModel(modelDir, modelType);
-        if (!detect.ok) {
-            LOGE("%s", detect.error.c_str());
-            return result;
-        }
-        switch (detect.selectedKind) {
-            case TtsModelKind::kVits:
-                config.model.vits.model = detect.paths.ttsModel;
-                config.model.vits.tokens = detect.paths.tokens;
-                config.model.vits.data_dir = detect.paths.dataDir;
-                if (noiseScale.has_value()) {
-                    config.model.vits.noise_scale = *noiseScale;
-                }
-                if (noiseScaleW.has_value()) {
-                    config.model.vits.noise_scale_w = *noiseScaleW;
-                }
-                if (lengthScale.has_value()) {
-                    config.model.vits.length_scale = *lengthScale;
-                }
-                break;
-            case TtsModelKind::kMatcha:
-                config.model.matcha.acoustic_model = detect.paths.acousticModel;
-                config.model.matcha.vocoder = detect.paths.vocoder;
-                config.model.matcha.tokens = detect.paths.tokens;
-                config.model.matcha.data_dir = detect.paths.dataDir;
-                if (noiseScale.has_value()) {
-                    config.model.matcha.noise_scale = *noiseScale;
-                }
-                if (lengthScale.has_value()) {
-                    config.model.matcha.length_scale = *lengthScale;
-                }
-                break;
-            case TtsModelKind::kKokoro:
-                config.model.kokoro.model = detect.paths.ttsModel;
-                config.model.kokoro.tokens = detect.paths.tokens;
-                config.model.kokoro.data_dir = detect.paths.dataDir;
-                config.model.kokoro.voices = detect.paths.voices;
-                if (!detect.paths.lexicon.empty()) {
-                    config.model.kokoro.lexicon = detect.paths.lexicon;
-                }
-                if (lengthScale.has_value()) {
-                    config.model.kokoro.length_scale = *lengthScale;
-                }
-                break;
-            case TtsModelKind::kKitten:
-                config.model.kitten.model = detect.paths.ttsModel;
-                config.model.kitten.tokens = detect.paths.tokens;
-                config.model.kitten.data_dir = detect.paths.dataDir;
-                config.model.kitten.voices = detect.paths.voices;
-                if (lengthScale.has_value()) {
-                    config.model.kitten.length_scale = *lengthScale;
-                }
-                break;
-            case TtsModelKind::kZipvoice:
-                config.model.zipvoice.encoder = detect.paths.encoder;
-                config.model.zipvoice.decoder = detect.paths.decoder;
-                config.model.zipvoice.vocoder = detect.paths.vocoder;
-                config.model.zipvoice.tokens = detect.paths.tokens;
-                config.model.zipvoice.data_dir = detect.paths.dataDir;
-                break;
-            case TtsModelKind::kUnknown:
-            default:
-                LOGE("TTS: Unknown model type: %s", modelType.c_str());
-                return result;
-        }
-        LOGI("TTS: Creating OfflineTts instance...");
-        pImpl->tts = sherpa_onnx::cxx::OfflineTts::Create(config);
-        if (!pImpl->tts.has_value()) {
-            LOGE("TTS: Failed to create OfflineTts instance");
-            return result;
-        }
-        pImpl->initialized = true;
-        pImpl->modelDir = modelDir;
-        LOGI("TTS: Initialization successful");
-        LOGI("TTS: Sample rate: %d Hz", pImpl->tts.value().SampleRate());
-        LOGI("TTS: Number of speakers: %d", pImpl->tts.value().NumSpeakers());
-        result.success = true;
-        result.detectedModels = detect.detectedModels;
-        return result;
-    } catch (const std::exception& e) {
-        LOGE("TTS: Exception during initialization: %s", e.what());
-        return result;
-    } catch (...) {
-        LOGE("TTS: Unknown exception during initialization");
-        return result;
-    }
-}
-TtsWrapper::AudioResult TtsWrapper::generate(
-    const std::string& text,
-    int32_t sid,
-    float speed
-) {
-    AudioResult result;
-    result.sampleRate = 0;
-    if (!pImpl->initialized || !pImpl->tts.has_value()) {
-        LOGE("TTS: Not initialized. Call initialize() first.");
-        return result;
-    }
-    if (text.empty()) {
-        LOGE("TTS: Input text is empty");
-        return result;
-    }
-    try {
-        LOGI("TTS: Generating speech for text: %s (sid=%d, speed=%.2f)",
-             text.c_str(), sid, speed);
-        auto audio = pImpl->tts.value().Generate(text, sid, speed);
-        result.samples = std::move(audio.samples);
-        result.sampleRate = audio.sample_rate;
-        LOGI("TTS: Generated %zu samples at %d Hz",
-             result.samples.size(), result.sampleRate);
-        return result;
-    } catch (const std::exception& e) {
-        LOGE("TTS: Exception during generation: %s", e.what());
-        return result;
-    } catch (...) {
-        LOGE("TTS: Unknown exception during generation");
-        return result;
-    }
-}
-bool TtsWrapper::generateStream(
-    const std::string& text,
-    int32_t sid,
-    float speed,
-    const TtsStreamCallback& callback
-) {
-    if (!pImpl->initialized || !pImpl->tts.has_value()) {
-        LOGE("TTS: Not initialized. Call initialize() first.");
-        return false;
-    }
-    if (text.empty()) {
-        LOGE("TTS: Input text is empty");
-        return false;
-    }
-    try {
-        LOGI("TTS: Streaming generation for text: %s (sid=%d, speed=%.2f)",
-             text.c_str(), sid, speed);
-        auto callbackCopy = callback;
-        auto shim = [](const float *samples, int32_t numSamples, float progress, void *arg) -> int32_t {
-            auto *cb = reinterpret_cast<TtsStreamCallback*>(arg);
-            if (!cb || !(*cb)) return 0;
-            return (*cb)(samples, numSamples, progress);
-        };
-        pImpl->tts.value().Generate(
-            text,
-            sid,
-            speed,
-            callbackCopy ? shim : nullptr,
-            callbackCopy ? &callbackCopy : nullptr
-        );
-        return true;
-    } catch (const std::exception& e) {
-        LOGE("TTS: Exception during streaming generation: %s", e.what());
-        return false;
-    } catch (...) {
-        LOGE("TTS: Unknown exception during streaming generation");
-        return false;
-    }
-}
-int32_t TtsWrapper::getSampleRate() const {
-    if (!pImpl->initialized || !pImpl->tts.has_value()) {
-        LOGE("TTS: Not initialized. Call initialize() first.");
-        return 0;
-    }
-    return pImpl->tts.value().SampleRate();
-}
-int32_t TtsWrapper::getNumSpeakers() const {
-    if (!pImpl->initialized || !pImpl->tts.has_value()) {
-        LOGE("TTS: Not initialized. Call initialize() first.");
-        return 0;
-    }
-    return pImpl->tts.value().NumSpeakers();
-}
-bool TtsWrapper::isInitialized() const {
-    return pImpl->initialized;
-}
-void TtsWrapper::release() {
-    if (pImpl->initialized) {
-        pImpl->tts.reset();
-        pImpl->initialized = false;
-        pImpl->modelDir.clear();
-        LOGI("TTS: Resources released");
-    }
-}
-bool TtsWrapper::saveToWavFile(
-    const std::vector<float>& samples,
-    int32_t sampleRate,
-    const std::string& filePath
-) {
-    if (samples.empty()) {
-        LOGE("TTS: Cannot save empty audio samples");
-        return false;
-    }
-    if (sampleRate <= 0) {
-        LOGE("TTS: Invalid sample rate: %d", sampleRate);
-        return false;
-    }
-    try {
-        std::ofstream outfile(filePath, std::ios::binary);
-        if (!outfile) {
-            LOGE("TTS: Failed to open output file: %s", filePath.c_str());
-            return false;
-        }
-        const int32_t numChannels = 1;
-        const int32_t bitsPerSample = 16;
-        const int32_t byteRate = sampleRate * numChannels * bitsPerSample / 8;
-        const int32_t blockAlign = numChannels * bitsPerSample / 8;
-        const int32_t dataSize = static_cast<int32_t>(samples.size()) * bitsPerSample / 8;
-        const int32_t chunkSize = 36 + dataSize;
-        outfile.write("RIFF", 4);
-        outfile.write(reinterpret_cast<const char*>(&chunkSize), 4);
-        outfile.write("WAVE", 4);
-        outfile.write("fmt ", 4);
-        const int32_t subchunk1Size = 16;
-        outfile.write(reinterpret_cast<const char*>(&subchunk1Size), 4);
-        const int16_t audioFormat = 1;
-        outfile.write(reinterpret_cast<const char*>(&audioFormat), 2);
-        const int16_t numChannelsInt16 = static_cast<int16_t>(numChannels);
-        outfile.write(reinterpret_cast<const char*>(&numChannelsInt16), 2);
-        outfile.write(reinterpret_cast<const char*>(&sampleRate), 4);
-        outfile.write(reinterpret_cast<const char*>(&byteRate), 4);
-        const int16_t blockAlignInt16 = static_cast<int16_t>(blockAlign);
-        outfile.write(reinterpret_cast<const char*>(&blockAlignInt16), 2);
-        const int16_t bitsPerSampleInt16 = static_cast<int16_t>(bitsPerSample);
-        outfile.write(reinterpret_cast<const char*>(&bitsPerSampleInt16), 2);
-        outfile.write("data", 4);
-        outfile.write(reinterpret_cast<const char*>(&dataSize), 4);
-        for (float sample : samples) {
-            float clamped = std::max(-1.0f, std::min(1.0f, sample));
-            int16_t intSample = static_cast<int16_t>(clamped * 32767.0f);
-            outfile.write(reinterpret_cast<const char*>(&intSample), sizeof(int16_t));
-        }
-        outfile.close();
-        LOGI("TTS: Successfully saved %zu samples to %s", samples.size(), filePath.c_str());
-        return true;
-    } catch (const std::exception& e) {
-        LOGE("TTS: Exception while saving WAV file: %s", e.what());
-        return false;
-    }
-}
-} // namespace sherpaonnx
+/**
+ * sherpa-onnx-tts-wrapper.mm
+ *
+ * Purpose: Wraps the sherpa-onnx C++ OfflineTts for iOS. Builds config from TtsModelPaths, creates
+ * TTS instance, generates audio from text. Used by SherpaOnnx+TTS.mm.
+ */
+#include "sherpa-onnx-tts-wrapper.h"
+#include "sherpa-onnx-model-detect.h"
+#include <algorithm>
+#include <cctype>
+#include <cstring>
+#include <fstream>
+#include <optional>
+#include <sstream>
+// iOS logging
+#ifdef __APPLE__
+#include <Foundation/Foundation.h>
+#include <cstdio>
+#define LOGI(fmt, ...) NSLog(@"TtsWrapper: " fmt, ##__VA_ARGS__)
+#define LOGE(fmt, ...) NSLog(@"TtsWrapper ERROR: " fmt, ##__VA_ARGS__)
+#else
+#define LOGI(...)
+#define LOGE(...)
+#endif
+// Use C++17 filesystem (podspec enforces C++17)
+#include <filesystem>
+namespace fs = std::filesystem;
+// sherpa-onnx headers - use C++ API (RAII wrapper around C API)
+#include "sherpa-onnx/c-api/cxx-api.h"
+namespace sherpaonnx {
+class TtsWrapper::Impl {
+public:
+    bool initialized = false;
+    std::string modelDir;
+    std::optional<sherpa_onnx::cxx::OfflineTts> tts;
+};
+TtsWrapper::TtsWrapper() : pImpl(std::make_unique<Impl>()) {
+    LOGI("TtsWrapper created");
+}
+TtsWrapper::~TtsWrapper() {
+    release();
+    LOGI("TtsWrapper destroyed");
+}
+TtsInitializeResult TtsWrapper::initialize(
+    const std::string& modelDir,
+    const std::string& modelType,
+    int32_t numThreads,
+    bool debug,
+    const std::optional<float>& noiseScale,
+    const std::optional<float>& noiseScaleW,
+    const std::optional<float>& lengthScale,
+    const std::optional<std::string>& ruleFsts,
+    const std::optional<std::string>& ruleFars,
+    const std::optional<int32_t>& maxNumSentences,
+    const std::optional<float>& silenceScale,
+    const std::optional<std::string>& provider
+) {
+    TtsInitializeResult result;
+    result.success = false;
+    if (pImpl->initialized) {
+        release();
+    }
+    if (modelDir.empty()) {
+        LOGE("TTS: Model directory is empty");
+        return result;
+    }
+    try {
+        sherpa_onnx::cxx::OfflineTtsConfig config;
+        config.model.num_threads = numThreads;
+        config.model.debug = debug;
+        if (provider.has_value() && !provider->empty()) {
+            config.model.provider = *provider;
+        }
+        auto detect = DetectTtsModel(modelDir, modelType);
+        if (!detect.ok) {
+            LOGE("%s", detect.error.c_str());
+            return result;
+        }
+        switch (detect.selectedKind) {
+            case TtsModelKind::kVits:
+                config.model.vits.model = detect.paths.ttsModel;
+                config.model.vits.tokens = detect.paths.tokens;
+                config.model.vits.data_dir = detect.paths.dataDir;
+                if (noiseScale.has_value()) {
+                    config.model.vits.noise_scale = *noiseScale;
+                }
+                if (noiseScaleW.has_value()) {
+                    config.model.vits.noise_scale_w = *noiseScaleW;
+                }
+                if (lengthScale.has_value()) {
+                    config.model.vits.length_scale = *lengthScale;
+                }
+                break;
+            case TtsModelKind::kMatcha:
+                config.model.matcha.acoustic_model = detect.paths.acousticModel;
+                config.model.matcha.vocoder = detect.paths.vocoder;
+                config.model.matcha.tokens = detect.paths.tokens;
+                config.model.matcha.data_dir = detect.paths.dataDir;
+                if (noiseScale.has_value()) {
+                    config.model.matcha.noise_scale = *noiseScale;
+                }
+                if (lengthScale.has_value()) {
+                    config.model.matcha.length_scale = *lengthScale;
+                }
+                break;
+            case TtsModelKind::kKokoro:
+                config.model.kokoro.model = detect.paths.ttsModel;
+                config.model.kokoro.tokens = detect.paths.tokens;
+                config.model.kokoro.data_dir = detect.paths.dataDir;
+                config.model.kokoro.voices = detect.paths.voices;
+                if (!detect.paths.lexicon.empty()) {
+                    config.model.kokoro.lexicon = detect.paths.lexicon;
+                }
+                if (lengthScale.has_value()) {
+                    config.model.kokoro.length_scale = *lengthScale;
+                }
+                break;
+            case TtsModelKind::kKitten:
+                config.model.kitten.model = detect.paths.ttsModel;
+                config.model.kitten.tokens = detect.paths.tokens;
+                config.model.kitten.data_dir = detect.paths.dataDir;
+                config.model.kitten.voices = detect.paths.voices;
+                if (lengthScale.has_value()) {
+                    config.model.kitten.length_scale = *lengthScale;
+                }
+                break;
+            case TtsModelKind::kZipvoice:
+                config.model.zipvoice.encoder = detect.paths.encoder;
+                config.model.zipvoice.decoder = detect.paths.decoder;
+                config.model.zipvoice.vocoder = detect.paths.vocoder;
+                config.model.zipvoice.tokens = detect.paths.tokens;
+                config.model.zipvoice.data_dir = detect.paths.dataDir;
+                break;
+            case TtsModelKind::kPocket:
+                LOGE("TTS: Pocket model type is detected but not yet supported on iOS");
+                return result;
+            case TtsModelKind::kUnknown:
+            default:
+                LOGE("TTS: Unknown model type: %s", modelType.c_str());
+                return result;
+        }
+        if (ruleFsts.has_value() && !ruleFsts->empty()) {
+            config.rule_fsts = *ruleFsts;
+        }
+        if (ruleFars.has_value() && !ruleFars->empty()) {
+            config.rule_fars = *ruleFars;
+        }
+        if (maxNumSentences.has_value() && *maxNumSentences >= 1) {
+            config.max_num_sentences = *maxNumSentences;
+        }
+        if (silenceScale.has_value()) {
+            config.silence_scale = *silenceScale;
+        }
+        LOGI("TTS: Creating OfflineTts instance...");
+        pImpl->tts = sherpa_onnx::cxx::OfflineTts::Create(config);
+        if (!pImpl->tts.has_value()) {
+            LOGE("TTS: Failed to create OfflineTts instance");
+            return result;
+        }
+        pImpl->initialized = true;
+        pImpl->modelDir = modelDir;
+        LOGI("TTS: Initialization successful");
+        LOGI("TTS: Sample rate: %d Hz", pImpl->tts.value().SampleRate());
+        LOGI("TTS: Number of speakers: %d", pImpl->tts.value().NumSpeakers());
+        result.success = true;
+        result.detectedModels = detect.detectedModels;
+        return result;
+    } catch (const std::exception& e) {
+        LOGE("TTS: Exception during initialization: %s", e.what());
+        return result;
+    } catch (...) {
+        LOGE("TTS: Unknown exception during initialization");
+        return result;
+    }
+}
+TtsWrapper::AudioResult TtsWrapper::generate(
+    const std::string& text,
+    int32_t sid,
+    float speed
+) {
+    AudioResult result;
+    result.sampleRate = 0;
+    if (!pImpl->initialized || !pImpl->tts.has_value()) {
+        LOGE("TTS: Not initialized. Call initialize() first.");
+        return result;
+    }
+    if (text.empty()) {
+        LOGE("TTS: Input text is empty");
+        return result;
+    }
+    try {
+        LOGI("TTS: Generating speech for text: %s (sid=%d, speed=%.2f)",
+             text.c_str(), sid, speed);
+        auto audio = pImpl->tts.value().Generate(text, sid, speed);
+        result.samples = std::move(audio.samples);
+        result.sampleRate = audio.sample_rate;
+        LOGI("TTS: Generated %zu samples at %d Hz",
+             result.samples.size(), result.sampleRate);
+        return result;
+    } catch (const std::exception& e) {
+        LOGE("TTS: Exception during generation: %s", e.what());
+        return result;
+    } catch (...) {
+        LOGE("TTS: Unknown exception during generation");
+        return result;
+    }
+}
+bool TtsWrapper::generateStream(
+    const std::string& text,
+    int32_t sid,
+    float speed,
+    const TtsStreamCallback& callback
+) {
+    if (!pImpl->initialized || !pImpl->tts.has_value()) {
+        LOGE("TTS: Not initialized. Call initialize() first.");
+        return false;
+    }
+    if (text.empty()) {
+        LOGE("TTS: Input text is empty");
+        return false;
+    }
+    try {
+        LOGI("TTS: Streaming generation for text: %s (sid=%d, speed=%.2f)",
+             text.c_str(), sid, speed);
+        auto callbackCopy = callback;
+        auto shim = [](const float *samples, int32_t numSamples, float progress, void *arg) -> int32_t {
+            auto *cb = reinterpret_cast<TtsStreamCallback*>(arg);
+            if (!cb || !(*cb)) return 0;
+            return (*cb)(samples, numSamples, progress);
+        };
+        pImpl->tts.value().Generate(
+            text,
+            sid,
+            speed,
+            callbackCopy ? shim : nullptr,
+            callbackCopy ? &callbackCopy : nullptr
+        );
+        return true;
+    } catch (const std::exception& e) {
+        LOGE("TTS: Exception during streaming generation: %s", e.what());
+        return false;
+    } catch (...) {
+        LOGE("TTS: Unknown exception during streaming generation");
+        return false;
+    }
+}
+int32_t TtsWrapper::getSampleRate() const {
+    if (!pImpl->initialized || !pImpl->tts.has_value()) {
+        LOGE("TTS: Not initialized. Call initialize() first.");
+        return 0;
+    }
+    return pImpl->tts.value().SampleRate();
+}
+int32_t TtsWrapper::getNumSpeakers() const {
+    if (!pImpl->initialized || !pImpl->tts.has_value()) {
+        LOGE("TTS: Not initialized. Call initialize() first.");
+        return 0;
+    }
+    return pImpl->tts.value().NumSpeakers();
+}
+bool TtsWrapper::isInitialized() const {
+    return pImpl->initialized;
+}
+void TtsWrapper::release() {
+    if (pImpl->initialized) {
+        pImpl->tts.reset();
+        pImpl->initialized = false;
+        pImpl->modelDir.clear();
+        LOGI("TTS: Resources released");
+    }
+}
+bool TtsWrapper::saveToWavFile(
+    const std::vector<float>& samples,
+    int32_t sampleRate,
+    const std::string& filePath
+) {
+    if (samples.empty()) {
+        LOGE("TTS: Cannot save empty audio samples");
+        return false;
+    }
+    if (sampleRate <= 0) {
+        LOGE("TTS: Invalid sample rate: %d", sampleRate);
+        return false;
+    }
+    try {
+        std::ofstream outfile(filePath, std::ios::binary);
+        if (!outfile) {
+            LOGE("TTS: Failed to open output file: %s", filePath.c_str());
+            return false;
+        }
+        const int32_t numChannels = 1;
+        const int32_t bitsPerSample = 16;
+        const int32_t byteRate = sampleRate * numChannels * bitsPerSample / 8;
+        const int32_t blockAlign = numChannels * bitsPerSample / 8;
+        const int32_t dataSize = static_cast<int32_t>(samples.size()) * bitsPerSample / 8;
+        const int32_t chunkSize = 36 + dataSize;
+        outfile.write("RIFF", 4);
+        outfile.write(reinterpret_cast<const char*>(&chunkSize), 4);
+        outfile.write("WAVE", 4);
+        outfile.write("fmt ", 4);
+        const int32_t subchunk1Size = 16;
+        outfile.write(reinterpret_cast<const char*>(&subchunk1Size), 4);
+        const int16_t audioFormat = 1;
+        outfile.write(reinterpret_cast<const char*>(&audioFormat), 2);
+        const int16_t numChannelsInt16 = static_cast<int16_t>(numChannels);
+        outfile.write(reinterpret_cast<const char*>(&numChannelsInt16), 2);
+        outfile.write(reinterpret_cast<const char*>(&sampleRate), 4);
+        outfile.write(reinterpret_cast<const char*>(&byteRate), 4);
+        const int16_t blockAlignInt16 = static_cast<int16_t>(blockAlign);
+        outfile.write(reinterpret_cast<const char*>(&blockAlignInt16), 2);
+        const int16_t bitsPerSampleInt16 = static_cast<int16_t>(bitsPerSample);
+        outfile.write(reinterpret_cast<const char*>(&bitsPerSampleInt16), 2);
+        outfile.write("data", 4);
+        outfile.write(reinterpret_cast<const char*>(&dataSize), 4);
+        for (float sample : samples) {
+            float clamped = std::max(-1.0f, std::min(1.0f, sample));
+            int16_t intSample = static_cast<int16_t>(clamped * 32767.0f);
+            outfile.write(reinterpret_cast<const char*>(&intSample), sizeof(int16_t));
+        }
+        outfile.close();
+        LOGI("TTS: Successfully saved %zu samples to %s", samples.size(), filePath.c_str());
+        return true;
+    } catch (const std::exception& e) {
+        LOGE("TTS: Exception while saving WAV file: %s", e.what());
+        return false;
+    }
+}
+} // namespace sherpaonnx