nexaai 1.0.29__cp310-cp310-macosx_14_0_universal2.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nexaai/__init__.py +99 -0
- nexaai/_stub.cpython-310-darwin.so +0 -0
- nexaai/_version.py +4 -0
- nexaai/asr.py +68 -0
- nexaai/asr_impl/__init__.py +0 -0
- nexaai/asr_impl/mlx_asr_impl.py +93 -0
- nexaai/asr_impl/pybind_asr_impl.py +127 -0
- nexaai/base.py +39 -0
- nexaai/binds/__init__.py +7 -0
- nexaai/binds/asr_bind.cpython-310-darwin.so +0 -0
- nexaai/binds/common_bind.cpython-310-darwin.so +0 -0
- nexaai/binds/cpu_gpu/libggml-base.dylib +0 -0
- nexaai/binds/cpu_gpu/libggml-cpu.so +0 -0
- nexaai/binds/cpu_gpu/libggml-metal.so +0 -0
- nexaai/binds/cpu_gpu/libggml.dylib +0 -0
- nexaai/binds/cpu_gpu/libmtmd.dylib +0 -0
- nexaai/binds/cpu_gpu/libnexa_cpu_gpu.dylib +0 -0
- nexaai/binds/cpu_gpu/libnexa_plugin.dylib +0 -0
- nexaai/binds/cv_bind.cpython-310-darwin.so +0 -0
- nexaai/binds/diarize_bind.cpython-310-darwin.so +0 -0
- nexaai/binds/embedder_bind.cpython-310-darwin.so +0 -0
- nexaai/binds/libnexa_bridge.dylib +0 -0
- nexaai/binds/llm_bind.cpython-310-darwin.so +0 -0
- nexaai/binds/metal/libnexa_plugin.dylib +0 -0
- nexaai/binds/metal/py-lib/ml.py +888 -0
- nexaai/binds/metal/py-lib/mlx_audio/__init__.py +0 -0
- nexaai/binds/metal/py-lib/mlx_audio/codec/__init__.py +1 -0
- nexaai/binds/metal/py-lib/mlx_audio/codec/models/__init__.py +5 -0
- nexaai/binds/metal/py-lib/mlx_audio/codec/models/bigvgan/__init__.py +1 -0
- nexaai/binds/metal/py-lib/mlx_audio/codec/models/bigvgan/activation.py +51 -0
- nexaai/binds/metal/py-lib/mlx_audio/codec/models/bigvgan/amp.py +96 -0
- nexaai/binds/metal/py-lib/mlx_audio/codec/models/bigvgan/bigvgan.py +149 -0
- nexaai/binds/metal/py-lib/mlx_audio/codec/models/bigvgan/conv.py +114 -0
- nexaai/binds/metal/py-lib/mlx_audio/codec/models/bigvgan/resample.py +177 -0
- nexaai/binds/metal/py-lib/mlx_audio/codec/models/descript/__init__.py +1 -0
- nexaai/binds/metal/py-lib/mlx_audio/codec/models/descript/base.py +228 -0
- nexaai/binds/metal/py-lib/mlx_audio/codec/models/descript/dac.py +285 -0
- nexaai/binds/metal/py-lib/mlx_audio/codec/models/descript/nn/__init__.py +1 -0
- nexaai/binds/metal/py-lib/mlx_audio/codec/models/descript/nn/layers.py +129 -0
- nexaai/binds/metal/py-lib/mlx_audio/codec/models/descript/nn/quantize.py +149 -0
- nexaai/binds/metal/py-lib/mlx_audio/codec/models/encodec/__init__.py +1 -0
- nexaai/binds/metal/py-lib/mlx_audio/codec/models/encodec/encodec.py +777 -0
- nexaai/binds/metal/py-lib/mlx_audio/codec/models/mimi/__init__.py +1 -0
- nexaai/binds/metal/py-lib/mlx_audio/codec/models/mimi/mimi.py +286 -0
- nexaai/binds/metal/py-lib/mlx_audio/codec/models/mimi/modules/__init__.py +20 -0
- nexaai/binds/metal/py-lib/mlx_audio/codec/models/mimi/modules/conv.py +398 -0
- nexaai/binds/metal/py-lib/mlx_audio/codec/models/mimi/modules/kv_cache.py +199 -0
- nexaai/binds/metal/py-lib/mlx_audio/codec/models/mimi/modules/quantization.py +179 -0
- nexaai/binds/metal/py-lib/mlx_audio/codec/models/mimi/modules/seanet.py +314 -0
- nexaai/binds/metal/py-lib/mlx_audio/codec/models/mimi/modules/transformer.py +256 -0
- nexaai/binds/metal/py-lib/mlx_audio/codec/models/s3/__init__.py +1 -0
- nexaai/binds/metal/py-lib/mlx_audio/codec/models/s3/model.py +260 -0
- nexaai/binds/metal/py-lib/mlx_audio/codec/models/s3/model_v2.py +383 -0
- nexaai/binds/metal/py-lib/mlx_audio/codec/models/s3/utils.py +122 -0
- nexaai/binds/metal/py-lib/mlx_audio/codec/models/snac/__init__.py +1 -0
- nexaai/binds/metal/py-lib/mlx_audio/codec/models/snac/attention.py +97 -0
- nexaai/binds/metal/py-lib/mlx_audio/codec/models/snac/layers.py +306 -0
- nexaai/binds/metal/py-lib/mlx_audio/codec/models/snac/snac.py +154 -0
- nexaai/binds/metal/py-lib/mlx_audio/codec/models/snac/vq.py +135 -0
- nexaai/binds/metal/py-lib/mlx_audio/codec/models/vocos/__init__.py +1 -0
- nexaai/binds/metal/py-lib/mlx_audio/codec/models/vocos/mel.py +33 -0
- nexaai/binds/metal/py-lib/mlx_audio/codec/models/vocos/vocos.py +359 -0
- nexaai/binds/metal/py-lib/mlx_audio/codec/tests/__init__.py +0 -0
- nexaai/binds/metal/py-lib/mlx_audio/codec/tests/test_bigvgan.py +54 -0
- nexaai/binds/metal/py-lib/mlx_audio/codec/tests/test_descript.py +109 -0
- nexaai/binds/metal/py-lib/mlx_audio/codec/tests/test_encodec.py +58 -0
- nexaai/binds/metal/py-lib/mlx_audio/codec/tests/test_mimi.py +22 -0
- nexaai/binds/metal/py-lib/mlx_audio/codec/tests/test_s3.py +25 -0
- nexaai/binds/metal/py-lib/mlx_audio/codec/tests/test_snac.py +40 -0
- nexaai/binds/metal/py-lib/mlx_audio/codec/tests/test_vocos.py +93 -0
- nexaai/binds/metal/py-lib/mlx_audio/server.py +525 -0
- nexaai/binds/metal/py-lib/mlx_audio/sts/__init__.py +0 -0
- nexaai/binds/metal/py-lib/mlx_audio/sts/tests/test_voice_pipeline.py +156 -0
- nexaai/binds/metal/py-lib/mlx_audio/sts/voice_pipeline.py +327 -0
- nexaai/binds/metal/py-lib/mlx_audio/stt/__init__.py +0 -0
- nexaai/binds/metal/py-lib/mlx_audio/stt/generate.py +174 -0
- nexaai/binds/metal/py-lib/mlx_audio/stt/models/__init__.py +0 -0
- nexaai/binds/metal/py-lib/mlx_audio/stt/models/parakeet/__init__.py +1 -0
- nexaai/binds/metal/py-lib/mlx_audio/stt/models/parakeet/alignment.py +248 -0
- nexaai/binds/metal/py-lib/mlx_audio/stt/models/parakeet/attention.py +187 -0
- nexaai/binds/metal/py-lib/mlx_audio/stt/models/parakeet/audio.py +76 -0
- nexaai/binds/metal/py-lib/mlx_audio/stt/models/parakeet/conformer.py +331 -0
- nexaai/binds/metal/py-lib/mlx_audio/stt/models/parakeet/ctc.py +34 -0
- nexaai/binds/metal/py-lib/mlx_audio/stt/models/parakeet/parakeet.py +604 -0
- nexaai/binds/metal/py-lib/mlx_audio/stt/models/parakeet/rnnt.py +157 -0
- nexaai/binds/metal/py-lib/mlx_audio/stt/models/parakeet/tokenizer.py +2 -0
- nexaai/binds/metal/py-lib/mlx_audio/stt/models/wav2vec/feature_extractor.py +757 -0
- nexaai/binds/metal/py-lib/mlx_audio/stt/models/wav2vec/wav2vec.py +738 -0
- nexaai/binds/metal/py-lib/mlx_audio/stt/models/whisper/__init__.py +1 -0
- nexaai/binds/metal/py-lib/mlx_audio/stt/models/whisper/audio.py +82 -0
- nexaai/binds/metal/py-lib/mlx_audio/stt/models/whisper/decoding.py +742 -0
- nexaai/binds/metal/py-lib/mlx_audio/stt/models/whisper/timing.py +329 -0
- nexaai/binds/metal/py-lib/mlx_audio/stt/models/whisper/tokenizer.py +398 -0
- nexaai/binds/metal/py-lib/mlx_audio/stt/models/whisper/whisper.py +862 -0
- nexaai/binds/metal/py-lib/mlx_audio/stt/models/whisper/writers.py +268 -0
- nexaai/binds/metal/py-lib/mlx_audio/stt/tests/test_models.py +381 -0
- nexaai/binds/metal/py-lib/mlx_audio/stt/utils.py +195 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/__init__.py +1 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/audio_player.py +120 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/convert.py +71 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/generate.py +449 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/__init__.py +0 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/bark/__init__.py +4 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/bark/bark.py +528 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/bark/isftnet.py +12 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/bark/pipeline.py +442 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/base.py +84 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/dia/__init__.py +1 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/dia/audio.py +287 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/dia/config.py +256 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/dia/dia.py +592 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/dia/layers.py +870 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/indextts/__init__.py +3 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/indextts/attention.py +180 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/indextts/bigvgan.py +124 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/indextts/conformer.py +247 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/indextts/ecapa_tdnn/__init__.py +0 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/indextts/ecapa_tdnn/asp.py +59 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/indextts/ecapa_tdnn/ecapa_tdnn.py +91 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/indextts/ecapa_tdnn/se_res2net.py +132 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/indextts/ecapa_tdnn/tdnn.py +42 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/indextts/gpt2.py +38 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/indextts/indextts.py +412 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/indextts/mel.py +37 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/indextts/normalize.py +294 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/indextts/perceiver.py +62 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/interpolate.py +108 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/kokoro/__init__.py +4 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/kokoro/istftnet.py +979 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/kokoro/kokoro.py +331 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/kokoro/modules.py +659 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/kokoro/pipeline.py +453 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/kokoro/voice.py +113 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/llama/__init__.py +3 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/llama/llama.py +324 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/outetts/__init__.py +1 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/outetts/audio_processor.py +351 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/outetts/dac_interface.py +162 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/outetts/outetts.py +255 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/outetts/prompt_processor.py +181 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/outetts/tokens.py +36 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/sesame/__init__.py +3 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/sesame/attention.py +195 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/sesame/sesame.py +633 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/sesame/watermarking.py +105 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/spark/__init__.py +1 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/spark/audio_tokenizer.py +138 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/spark/bicodec.py +269 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/spark/modules/__init__.py +0 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/spark/modules/blocks/__init__.py +0 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/spark/modules/blocks/sampler.py +111 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/spark/modules/encoder_decoder/__init__.py +0 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/spark/modules/encoder_decoder/feat_decoder.py +120 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/spark/modules/encoder_decoder/feat_encoder.py +136 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/spark/modules/encoder_decoder/wave_generator.py +113 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/spark/modules/finite_scalar_quantization.py +238 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/spark/modules/residual.py +209 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/spark/modules/residual_fsq.py +309 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/spark/modules/speaker/__init__.py +1 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/spark/modules/speaker/ecapa_tdnn.py +283 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/spark/modules/speaker/perceiver_encoder.py +326 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/spark/modules/speaker/pooling_layers.py +297 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/spark/modules/speaker/speaker_encoder.py +155 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/spark/spark.py +382 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/spark/utils/audio.py +220 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/spark/utils/file.py +221 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/spark/utils/token_parser.py +181 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/tests/__init__.py +0 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/tests/test_base.py +66 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/tests/test_convert.py +173 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/tests/test_interpolate.py +88 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/tests/test_models.py +974 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/utils.py +337 -0
- nexaai/binds/metal/py-lib/mlx_audio/utils.py +237 -0
- nexaai/binds/metal/py-lib/mlx_audio/version.py +1 -0
- nexaai/binds/metal/py-lib/profiling.py +239 -0
- nexaai/binds/nexaml/libfftw3.3.dylib +0 -0
- nexaai/binds/nexaml/libfftw3f.3.dylib +0 -0
- nexaai/binds/nexaml/libggml-base.dylib +0 -0
- nexaai/binds/nexaml/libggml-cpu.so +0 -0
- nexaai/binds/nexaml/libggml-metal.so +0 -0
- nexaai/binds/nexaml/libggml.dylib +0 -0
- nexaai/binds/nexaml/libmp3lame.0.dylib +0 -0
- nexaai/binds/nexaml/libmpg123.0.dylib +0 -0
- nexaai/binds/nexaml/libnexa-mm-process.dylib +0 -0
- nexaai/binds/nexaml/libnexa-sampling.dylib +0 -0
- nexaai/binds/nexaml/libnexa_plugin.dylib +0 -0
- nexaai/binds/nexaml/libnexaproc.dylib +0 -0
- nexaai/binds/nexaml/libomp.dylib +0 -0
- nexaai/binds/nexaml/libqwen3-vl.dylib +0 -0
- nexaai/binds/nexaml/libqwen3vl-vision.dylib +0 -0
- nexaai/binds/rerank_bind.cpython-310-darwin.so +0 -0
- nexaai/binds/vlm_bind.cpython-310-darwin.so +0 -0
- nexaai/common.py +106 -0
- nexaai/cv.py +95 -0
- nexaai/cv_impl/__init__.py +0 -0
- nexaai/cv_impl/mlx_cv_impl.py +91 -0
- nexaai/cv_impl/pybind_cv_impl.py +124 -0
- nexaai/diarize.py +80 -0
- nexaai/diarize_impl/__init__.py +1 -0
- nexaai/diarize_impl/pybind_diarize_impl.py +125 -0
- nexaai/embedder.py +73 -0
- nexaai/embedder_impl/__init__.py +0 -0
- nexaai/embedder_impl/mlx_embedder_impl.py +118 -0
- nexaai/embedder_impl/pybind_embedder_impl.py +96 -0
- nexaai/image_gen.py +141 -0
- nexaai/image_gen_impl/__init__.py +0 -0
- nexaai/image_gen_impl/mlx_image_gen_impl.py +292 -0
- nexaai/image_gen_impl/pybind_image_gen_impl.py +85 -0
- nexaai/llm.py +98 -0
- nexaai/llm_impl/__init__.py +0 -0
- nexaai/llm_impl/mlx_llm_impl.py +271 -0
- nexaai/llm_impl/pybind_llm_impl.py +238 -0
- nexaai/log.py +92 -0
- nexaai/mlx_backend/asr/__init__.py +12 -0
- nexaai/mlx_backend/asr/interface.py +122 -0
- nexaai/mlx_backend/common/__init__.py +0 -0
- nexaai/mlx_backend/common/utils.py +25 -0
- nexaai/mlx_backend/cv/__init__.py +0 -0
- nexaai/mlx_backend/cv/generate.py +195 -0
- nexaai/mlx_backend/cv/interface.py +162 -0
- nexaai/mlx_backend/cv/main.py +81 -0
- nexaai/mlx_backend/cv/modeling/pp_ocr_v4.py +1736 -0
- nexaai/mlx_backend/embedding/__init__.py +0 -0
- nexaai/mlx_backend/embedding/generate.py +333 -0
- nexaai/mlx_backend/embedding/interface.py +617 -0
- nexaai/mlx_backend/embedding/main.py +173 -0
- nexaai/mlx_backend/embedding/modeling/__init__.py +0 -0
- nexaai/mlx_backend/embedding/modeling/nexa_jina_v2.py +399 -0
- nexaai/mlx_backend/image_gen/__init__.py +1 -0
- nexaai/mlx_backend/image_gen/generate_sd.py +244 -0
- nexaai/mlx_backend/image_gen/interface.py +82 -0
- nexaai/mlx_backend/image_gen/main.py +281 -0
- nexaai/mlx_backend/image_gen/stable_diffusion/__init__.py +306 -0
- nexaai/mlx_backend/image_gen/stable_diffusion/clip.py +116 -0
- nexaai/mlx_backend/image_gen/stable_diffusion/config.py +65 -0
- nexaai/mlx_backend/image_gen/stable_diffusion/model_io.py +386 -0
- nexaai/mlx_backend/image_gen/stable_diffusion/sampler.py +105 -0
- nexaai/mlx_backend/image_gen/stable_diffusion/tokenizer.py +100 -0
- nexaai/mlx_backend/image_gen/stable_diffusion/unet.py +460 -0
- nexaai/mlx_backend/image_gen/stable_diffusion/vae.py +274 -0
- nexaai/mlx_backend/llm/__init__.py +0 -0
- nexaai/mlx_backend/llm/generate.py +149 -0
- nexaai/mlx_backend/llm/interface.py +764 -0
- nexaai/mlx_backend/llm/main.py +68 -0
- nexaai/mlx_backend/ml.py +888 -0
- nexaai/mlx_backend/mlx_audio/__init__.py +0 -0
- nexaai/mlx_backend/mlx_audio/codec/__init__.py +1 -0
- nexaai/mlx_backend/mlx_audio/codec/models/__init__.py +5 -0
- nexaai/mlx_backend/mlx_audio/codec/models/bigvgan/__init__.py +1 -0
- nexaai/mlx_backend/mlx_audio/codec/models/bigvgan/activation.py +51 -0
- nexaai/mlx_backend/mlx_audio/codec/models/bigvgan/amp.py +96 -0
- nexaai/mlx_backend/mlx_audio/codec/models/bigvgan/bigvgan.py +149 -0
- nexaai/mlx_backend/mlx_audio/codec/models/bigvgan/conv.py +114 -0
- nexaai/mlx_backend/mlx_audio/codec/models/bigvgan/resample.py +177 -0
- nexaai/mlx_backend/mlx_audio/codec/models/descript/__init__.py +1 -0
- nexaai/mlx_backend/mlx_audio/codec/models/descript/base.py +228 -0
- nexaai/mlx_backend/mlx_audio/codec/models/descript/dac.py +285 -0
- nexaai/mlx_backend/mlx_audio/codec/models/descript/nn/__init__.py +1 -0
- nexaai/mlx_backend/mlx_audio/codec/models/descript/nn/layers.py +129 -0
- nexaai/mlx_backend/mlx_audio/codec/models/descript/nn/quantize.py +149 -0
- nexaai/mlx_backend/mlx_audio/codec/models/encodec/__init__.py +1 -0
- nexaai/mlx_backend/mlx_audio/codec/models/encodec/encodec.py +777 -0
- nexaai/mlx_backend/mlx_audio/codec/models/mimi/__init__.py +1 -0
- nexaai/mlx_backend/mlx_audio/codec/models/mimi/mimi.py +286 -0
- nexaai/mlx_backend/mlx_audio/codec/models/mimi/modules/__init__.py +20 -0
- nexaai/mlx_backend/mlx_audio/codec/models/mimi/modules/conv.py +398 -0
- nexaai/mlx_backend/mlx_audio/codec/models/mimi/modules/kv_cache.py +199 -0
- nexaai/mlx_backend/mlx_audio/codec/models/mimi/modules/quantization.py +179 -0
- nexaai/mlx_backend/mlx_audio/codec/models/mimi/modules/seanet.py +314 -0
- nexaai/mlx_backend/mlx_audio/codec/models/mimi/modules/transformer.py +256 -0
- nexaai/mlx_backend/mlx_audio/codec/models/s3/__init__.py +1 -0
- nexaai/mlx_backend/mlx_audio/codec/models/s3/model.py +260 -0
- nexaai/mlx_backend/mlx_audio/codec/models/s3/model_v2.py +383 -0
- nexaai/mlx_backend/mlx_audio/codec/models/s3/utils.py +122 -0
- nexaai/mlx_backend/mlx_audio/codec/models/snac/__init__.py +1 -0
- nexaai/mlx_backend/mlx_audio/codec/models/snac/attention.py +97 -0
- nexaai/mlx_backend/mlx_audio/codec/models/snac/layers.py +306 -0
- nexaai/mlx_backend/mlx_audio/codec/models/snac/snac.py +154 -0
- nexaai/mlx_backend/mlx_audio/codec/models/snac/vq.py +135 -0
- nexaai/mlx_backend/mlx_audio/codec/models/vocos/__init__.py +1 -0
- nexaai/mlx_backend/mlx_audio/codec/models/vocos/mel.py +33 -0
- nexaai/mlx_backend/mlx_audio/codec/models/vocos/vocos.py +359 -0
- nexaai/mlx_backend/mlx_audio/codec/tests/__init__.py +0 -0
- nexaai/mlx_backend/mlx_audio/codec/tests/test_bigvgan.py +54 -0
- nexaai/mlx_backend/mlx_audio/codec/tests/test_descript.py +109 -0
- nexaai/mlx_backend/mlx_audio/codec/tests/test_encodec.py +58 -0
- nexaai/mlx_backend/mlx_audio/codec/tests/test_mimi.py +22 -0
- nexaai/mlx_backend/mlx_audio/codec/tests/test_s3.py +25 -0
- nexaai/mlx_backend/mlx_audio/codec/tests/test_snac.py +40 -0
- nexaai/mlx_backend/mlx_audio/codec/tests/test_vocos.py +93 -0
- nexaai/mlx_backend/mlx_audio/server.py +525 -0
- nexaai/mlx_backend/mlx_audio/sts/__init__.py +0 -0
- nexaai/mlx_backend/mlx_audio/sts/tests/test_voice_pipeline.py +156 -0
- nexaai/mlx_backend/mlx_audio/sts/voice_pipeline.py +327 -0
- nexaai/mlx_backend/mlx_audio/stt/__init__.py +0 -0
- nexaai/mlx_backend/mlx_audio/stt/generate.py +174 -0
- nexaai/mlx_backend/mlx_audio/stt/models/__init__.py +0 -0
- nexaai/mlx_backend/mlx_audio/stt/models/parakeet/__init__.py +1 -0
- nexaai/mlx_backend/mlx_audio/stt/models/parakeet/alignment.py +248 -0
- nexaai/mlx_backend/mlx_audio/stt/models/parakeet/attention.py +187 -0
- nexaai/mlx_backend/mlx_audio/stt/models/parakeet/audio.py +76 -0
- nexaai/mlx_backend/mlx_audio/stt/models/parakeet/conformer.py +331 -0
- nexaai/mlx_backend/mlx_audio/stt/models/parakeet/ctc.py +34 -0
- nexaai/mlx_backend/mlx_audio/stt/models/parakeet/parakeet.py +604 -0
- nexaai/mlx_backend/mlx_audio/stt/models/parakeet/rnnt.py +157 -0
- nexaai/mlx_backend/mlx_audio/stt/models/parakeet/tokenizer.py +2 -0
- nexaai/mlx_backend/mlx_audio/stt/models/wav2vec/feature_extractor.py +757 -0
- nexaai/mlx_backend/mlx_audio/stt/models/wav2vec/wav2vec.py +738 -0
- nexaai/mlx_backend/mlx_audio/stt/models/whisper/__init__.py +1 -0
- nexaai/mlx_backend/mlx_audio/stt/models/whisper/audio.py +82 -0
- nexaai/mlx_backend/mlx_audio/stt/models/whisper/decoding.py +742 -0
- nexaai/mlx_backend/mlx_audio/stt/models/whisper/timing.py +329 -0
- nexaai/mlx_backend/mlx_audio/stt/models/whisper/tokenizer.py +398 -0
- nexaai/mlx_backend/mlx_audio/stt/models/whisper/whisper.py +862 -0
- nexaai/mlx_backend/mlx_audio/stt/models/whisper/writers.py +268 -0
- nexaai/mlx_backend/mlx_audio/stt/tests/test_models.py +381 -0
- nexaai/mlx_backend/mlx_audio/stt/utils.py +195 -0
- nexaai/mlx_backend/mlx_audio/tts/__init__.py +1 -0
- nexaai/mlx_backend/mlx_audio/tts/audio_player.py +120 -0
- nexaai/mlx_backend/mlx_audio/tts/convert.py +71 -0
- nexaai/mlx_backend/mlx_audio/tts/generate.py +449 -0
- nexaai/mlx_backend/mlx_audio/tts/models/__init__.py +0 -0
- nexaai/mlx_backend/mlx_audio/tts/models/bark/__init__.py +4 -0
- nexaai/mlx_backend/mlx_audio/tts/models/bark/bark.py +528 -0
- nexaai/mlx_backend/mlx_audio/tts/models/bark/isftnet.py +12 -0
- nexaai/mlx_backend/mlx_audio/tts/models/bark/pipeline.py +442 -0
- nexaai/mlx_backend/mlx_audio/tts/models/base.py +84 -0
- nexaai/mlx_backend/mlx_audio/tts/models/dia/__init__.py +1 -0
- nexaai/mlx_backend/mlx_audio/tts/models/dia/audio.py +287 -0
- nexaai/mlx_backend/mlx_audio/tts/models/dia/config.py +256 -0
- nexaai/mlx_backend/mlx_audio/tts/models/dia/dia.py +592 -0
- nexaai/mlx_backend/mlx_audio/tts/models/dia/layers.py +870 -0
- nexaai/mlx_backend/mlx_audio/tts/models/indextts/__init__.py +3 -0
- nexaai/mlx_backend/mlx_audio/tts/models/indextts/attention.py +180 -0
- nexaai/mlx_backend/mlx_audio/tts/models/indextts/bigvgan.py +124 -0
- nexaai/mlx_backend/mlx_audio/tts/models/indextts/conformer.py +247 -0
- nexaai/mlx_backend/mlx_audio/tts/models/indextts/ecapa_tdnn/__init__.py +0 -0
- nexaai/mlx_backend/mlx_audio/tts/models/indextts/ecapa_tdnn/asp.py +59 -0
- nexaai/mlx_backend/mlx_audio/tts/models/indextts/ecapa_tdnn/ecapa_tdnn.py +91 -0
- nexaai/mlx_backend/mlx_audio/tts/models/indextts/ecapa_tdnn/se_res2net.py +132 -0
- nexaai/mlx_backend/mlx_audio/tts/models/indextts/ecapa_tdnn/tdnn.py +42 -0
- nexaai/mlx_backend/mlx_audio/tts/models/indextts/gpt2.py +38 -0
- nexaai/mlx_backend/mlx_audio/tts/models/indextts/indextts.py +412 -0
- nexaai/mlx_backend/mlx_audio/tts/models/indextts/mel.py +37 -0
- nexaai/mlx_backend/mlx_audio/tts/models/indextts/normalize.py +294 -0
- nexaai/mlx_backend/mlx_audio/tts/models/indextts/perceiver.py +62 -0
- nexaai/mlx_backend/mlx_audio/tts/models/interpolate.py +108 -0
- nexaai/mlx_backend/mlx_audio/tts/models/kokoro/__init__.py +4 -0
- nexaai/mlx_backend/mlx_audio/tts/models/kokoro/istftnet.py +979 -0
- nexaai/mlx_backend/mlx_audio/tts/models/kokoro/kokoro.py +331 -0
- nexaai/mlx_backend/mlx_audio/tts/models/kokoro/modules.py +659 -0
- nexaai/mlx_backend/mlx_audio/tts/models/kokoro/pipeline.py +453 -0
- nexaai/mlx_backend/mlx_audio/tts/models/kokoro/voice.py +113 -0
- nexaai/mlx_backend/mlx_audio/tts/models/llama/__init__.py +3 -0
- nexaai/mlx_backend/mlx_audio/tts/models/llama/llama.py +324 -0
- nexaai/mlx_backend/mlx_audio/tts/models/outetts/__init__.py +1 -0
- nexaai/mlx_backend/mlx_audio/tts/models/outetts/audio_processor.py +351 -0
- nexaai/mlx_backend/mlx_audio/tts/models/outetts/dac_interface.py +162 -0
- nexaai/mlx_backend/mlx_audio/tts/models/outetts/default_speaker.json +461 -0
- nexaai/mlx_backend/mlx_audio/tts/models/outetts/outetts.py +255 -0
- nexaai/mlx_backend/mlx_audio/tts/models/outetts/prompt_processor.py +181 -0
- nexaai/mlx_backend/mlx_audio/tts/models/outetts/tokens.py +36 -0
- nexaai/mlx_backend/mlx_audio/tts/models/sesame/__init__.py +3 -0
- nexaai/mlx_backend/mlx_audio/tts/models/sesame/attention.py +195 -0
- nexaai/mlx_backend/mlx_audio/tts/models/sesame/sesame.py +633 -0
- nexaai/mlx_backend/mlx_audio/tts/models/sesame/watermarking.py +105 -0
- nexaai/mlx_backend/mlx_audio/tts/models/spark/__init__.py +1 -0
- nexaai/mlx_backend/mlx_audio/tts/models/spark/audio_tokenizer.py +138 -0
- nexaai/mlx_backend/mlx_audio/tts/models/spark/bicodec.py +269 -0
- nexaai/mlx_backend/mlx_audio/tts/models/spark/modules/__init__.py +0 -0
- nexaai/mlx_backend/mlx_audio/tts/models/spark/modules/blocks/__init__.py +0 -0
- nexaai/mlx_backend/mlx_audio/tts/models/spark/modules/blocks/sampler.py +111 -0
- nexaai/mlx_backend/mlx_audio/tts/models/spark/modules/encoder_decoder/__init__.py +0 -0
- nexaai/mlx_backend/mlx_audio/tts/models/spark/modules/encoder_decoder/feat_decoder.py +120 -0
- nexaai/mlx_backend/mlx_audio/tts/models/spark/modules/encoder_decoder/feat_encoder.py +136 -0
- nexaai/mlx_backend/mlx_audio/tts/models/spark/modules/encoder_decoder/wave_generator.py +113 -0
- nexaai/mlx_backend/mlx_audio/tts/models/spark/modules/finite_scalar_quantization.py +238 -0
- nexaai/mlx_backend/mlx_audio/tts/models/spark/modules/residual.py +209 -0
- nexaai/mlx_backend/mlx_audio/tts/models/spark/modules/residual_fsq.py +309 -0
- nexaai/mlx_backend/mlx_audio/tts/models/spark/modules/speaker/__init__.py +1 -0
- nexaai/mlx_backend/mlx_audio/tts/models/spark/modules/speaker/ecapa_tdnn.py +283 -0
- nexaai/mlx_backend/mlx_audio/tts/models/spark/modules/speaker/perceiver_encoder.py +326 -0
- nexaai/mlx_backend/mlx_audio/tts/models/spark/modules/speaker/pooling_layers.py +297 -0
- nexaai/mlx_backend/mlx_audio/tts/models/spark/modules/speaker/speaker_encoder.py +155 -0
- nexaai/mlx_backend/mlx_audio/tts/models/spark/spark.py +382 -0
- nexaai/mlx_backend/mlx_audio/tts/models/spark/utils/audio.py +220 -0
- nexaai/mlx_backend/mlx_audio/tts/models/spark/utils/file.py +221 -0
- nexaai/mlx_backend/mlx_audio/tts/models/spark/utils/token_parser.py +181 -0
- nexaai/mlx_backend/mlx_audio/tts/tests/__init__.py +0 -0
- nexaai/mlx_backend/mlx_audio/tts/tests/test_base.py +66 -0
- nexaai/mlx_backend/mlx_audio/tts/tests/test_convert.py +173 -0
- nexaai/mlx_backend/mlx_audio/tts/tests/test_interpolate.py +88 -0
- nexaai/mlx_backend/mlx_audio/tts/tests/test_models.py +974 -0
- nexaai/mlx_backend/mlx_audio/tts/utils.py +337 -0
- nexaai/mlx_backend/mlx_audio/utils.py +237 -0
- nexaai/mlx_backend/mlx_audio/version.py +1 -0
- nexaai/mlx_backend/profiling.py +239 -0
- nexaai/mlx_backend/rerank/__init__.py +0 -0
- nexaai/mlx_backend/rerank/generate.py +174 -0
- nexaai/mlx_backend/rerank/interface.py +287 -0
- nexaai/mlx_backend/rerank/main.py +127 -0
- nexaai/mlx_backend/rerank/modeling/__init__.py +0 -0
- nexaai/mlx_backend/rerank/modeling/nexa_jina_rerank.py +330 -0
- nexaai/mlx_backend/sd/__init__.py +1 -0
- nexaai/mlx_backend/sd/interface.py +362 -0
- nexaai/mlx_backend/sd/main.py +286 -0
- nexaai/mlx_backend/sd/modeling/__init__.py +306 -0
- nexaai/mlx_backend/sd/modeling/clip.py +116 -0
- nexaai/mlx_backend/sd/modeling/config.py +65 -0
- nexaai/mlx_backend/sd/modeling/model_io.py +385 -0
- nexaai/mlx_backend/sd/modeling/sampler.py +105 -0
- nexaai/mlx_backend/sd/modeling/tokenizer.py +100 -0
- nexaai/mlx_backend/sd/modeling/unet.py +460 -0
- nexaai/mlx_backend/sd/modeling/vae.py +274 -0
- nexaai/mlx_backend/tts/__init__.py +12 -0
- nexaai/mlx_backend/tts/interface.py +276 -0
- nexaai/mlx_backend/vlm/__init__.py +3 -0
- nexaai/mlx_backend/vlm/generate.py +572 -0
- nexaai/mlx_backend/vlm/generate_qwen3_vl.py +374 -0
- nexaai/mlx_backend/vlm/generate_qwen3_vl_moe.py +259 -0
- nexaai/mlx_backend/vlm/interface.py +559 -0
- nexaai/mlx_backend/vlm/main.py +365 -0
- nexaai/mlx_backend/vlm/modeling/__init__.py +0 -0
- nexaai/mlx_backend/vlm/modeling/convert.py +68 -0
- nexaai/mlx_backend/vlm/modeling/models/__init__.py +0 -0
- nexaai/mlx_backend/vlm/modeling/models/aya_vision/__init__.py +8 -0
- nexaai/mlx_backend/vlm/modeling/models/aya_vision/aya_vision.py +193 -0
- nexaai/mlx_backend/vlm/modeling/models/aya_vision/interpolate.py +186 -0
- nexaai/mlx_backend/vlm/modeling/models/aya_vision/language.py +233 -0
- nexaai/mlx_backend/vlm/modeling/models/aya_vision/vision.py +503 -0
- nexaai/mlx_backend/vlm/modeling/models/base.py +202 -0
- nexaai/mlx_backend/vlm/modeling/models/cache.py +230 -0
- nexaai/mlx_backend/vlm/modeling/models/deepseek_vl_v2/__init__.py +10 -0
- nexaai/mlx_backend/vlm/modeling/models/deepseek_vl_v2/conversation.py +264 -0
- nexaai/mlx_backend/vlm/modeling/models/deepseek_vl_v2/deepseek_vl_v2.py +472 -0
- nexaai/mlx_backend/vlm/modeling/models/deepseek_vl_v2/language.py +591 -0
- nexaai/mlx_backend/vlm/modeling/models/deepseek_vl_v2/processing_deepsek_vl_v2.py +526 -0
- nexaai/mlx_backend/vlm/modeling/models/deepseek_vl_v2/vision.py +356 -0
- nexaai/mlx_backend/vlm/modeling/models/florence2/__init__.py +8 -0
- nexaai/mlx_backend/vlm/modeling/models/florence2/florence2.py +366 -0
- nexaai/mlx_backend/vlm/modeling/models/florence2/language.py +488 -0
- nexaai/mlx_backend/vlm/modeling/models/florence2/vision.py +591 -0
- nexaai/mlx_backend/vlm/modeling/models/gemma3/__init__.py +8 -0
- nexaai/mlx_backend/vlm/modeling/models/gemma3/gemma3.py +213 -0
- nexaai/mlx_backend/vlm/modeling/models/gemma3/language.py +315 -0
- nexaai/mlx_backend/vlm/modeling/models/gemma3/vision.py +238 -0
- nexaai/mlx_backend/vlm/modeling/models/gemma3n/__init__.py +2 -0
- nexaai/mlx_backend/vlm/modeling/models/gemma3n/audio.py +1038 -0
- nexaai/mlx_backend/vlm/modeling/models/gemma3n/config.py +139 -0
- nexaai/mlx_backend/vlm/modeling/models/gemma3n/gemma3n.py +322 -0
- nexaai/mlx_backend/vlm/modeling/models/gemma3n/language.py +629 -0
- nexaai/mlx_backend/vlm/modeling/models/gemma3n/vision.py +1022 -0
- nexaai/mlx_backend/vlm/modeling/models/idefics2/__init__.py +9 -0
- nexaai/mlx_backend/vlm/modeling/models/idefics2/idefics2.py +294 -0
- nexaai/mlx_backend/vlm/modeling/models/idefics2/language.py +191 -0
- nexaai/mlx_backend/vlm/modeling/models/idefics2/vision.py +267 -0
- nexaai/mlx_backend/vlm/modeling/models/idefics3/__init__.py +8 -0
- nexaai/mlx_backend/vlm/modeling/models/idefics3/idefics3.py +175 -0
- nexaai/mlx_backend/vlm/modeling/models/idefics3/language.py +192 -0
- nexaai/mlx_backend/vlm/modeling/models/idefics3/vision.py +233 -0
- nexaai/mlx_backend/vlm/modeling/models/internvl_chat/__init__.py +9 -0
- nexaai/mlx_backend/vlm/modeling/models/internvl_chat/internvl_chat.py +140 -0
- nexaai/mlx_backend/vlm/modeling/models/internvl_chat/language.py +220 -0
- nexaai/mlx_backend/vlm/modeling/models/internvl_chat/processor.py +393 -0
- nexaai/mlx_backend/vlm/modeling/models/internvl_chat/vision.py +293 -0
- nexaai/mlx_backend/vlm/modeling/models/kernels.py +307 -0
- nexaai/mlx_backend/vlm/modeling/models/kimi_vl/__init__.py +8 -0
- nexaai/mlx_backend/vlm/modeling/models/kimi_vl/kimi_vl.py +143 -0
- nexaai/mlx_backend/vlm/modeling/models/kimi_vl/language.py +509 -0
- nexaai/mlx_backend/vlm/modeling/models/kimi_vl/vision.py +522 -0
- nexaai/mlx_backend/vlm/modeling/models/llama4/__init__.py +8 -0
- nexaai/mlx_backend/vlm/modeling/models/llama4/language.py +386 -0
- nexaai/mlx_backend/vlm/modeling/models/llama4/llama4.py +138 -0
- nexaai/mlx_backend/vlm/modeling/models/llama4/vision.py +560 -0
- nexaai/mlx_backend/vlm/modeling/models/llava/__init__.py +8 -0
- nexaai/mlx_backend/vlm/modeling/models/llava/language.py +240 -0
- nexaai/mlx_backend/vlm/modeling/models/llava/llava.py +153 -0
- nexaai/mlx_backend/vlm/modeling/models/llava/vision.py +259 -0
- nexaai/mlx_backend/vlm/modeling/models/llava_bunny/__init__.py +9 -0
- nexaai/mlx_backend/vlm/modeling/models/llava_bunny/language.py +236 -0
- nexaai/mlx_backend/vlm/modeling/models/llava_bunny/llava_bunny.py +256 -0
- nexaai/mlx_backend/vlm/modeling/models/llava_bunny/vision.py +303 -0
- nexaai/mlx_backend/vlm/modeling/models/llava_next/__init__.py +8 -0
- nexaai/mlx_backend/vlm/modeling/models/llava_next/language.py +230 -0
- nexaai/mlx_backend/vlm/modeling/models/llava_next/llava_next.py +160 -0
- nexaai/mlx_backend/vlm/modeling/models/llava_next/vision.py +243 -0
- nexaai/mlx_backend/vlm/modeling/models/mistral3/__init__.py +8 -0
- nexaai/mlx_backend/vlm/modeling/models/mistral3/mistral3.py +283 -0
- nexaai/mlx_backend/vlm/modeling/models/mllama/__init__.py +8 -0
- nexaai/mlx_backend/vlm/modeling/models/mllama/language.py +416 -0
- nexaai/mlx_backend/vlm/modeling/models/mllama/mllama.py +172 -0
- nexaai/mlx_backend/vlm/modeling/models/mllama/vision.py +499 -0
- nexaai/mlx_backend/vlm/modeling/models/molmo/__init__.py +8 -0
- nexaai/mlx_backend/vlm/modeling/models/molmo/language.py +243 -0
- nexaai/mlx_backend/vlm/modeling/models/molmo/molmo.py +133 -0
- nexaai/mlx_backend/vlm/modeling/models/molmo/vision.py +465 -0
- nexaai/mlx_backend/vlm/modeling/models/multi_modality/__init__.py +10 -0
- nexaai/mlx_backend/vlm/modeling/models/multi_modality/language.py +230 -0
- nexaai/mlx_backend/vlm/modeling/models/multi_modality/multi_modality.py +385 -0
- nexaai/mlx_backend/vlm/modeling/models/multi_modality/sam.py +557 -0
- nexaai/mlx_backend/vlm/modeling/models/multi_modality/vision.py +526 -0
- nexaai/mlx_backend/vlm/modeling/models/paligemma/__init__.py +8 -0
- nexaai/mlx_backend/vlm/modeling/models/paligemma/language.py +282 -0
- nexaai/mlx_backend/vlm/modeling/models/paligemma/paligemma.py +160 -0
- nexaai/mlx_backend/vlm/modeling/models/paligemma/vision.py +242 -0
- nexaai/mlx_backend/vlm/modeling/models/phi3_v/__init__.py +8 -0
- nexaai/mlx_backend/vlm/modeling/models/phi3_v/language.py +21 -0
- nexaai/mlx_backend/vlm/modeling/models/phi3_v/phi3_v.py +243 -0
- nexaai/mlx_backend/vlm/modeling/models/phi3_v/su_rope.py +71 -0
- nexaai/mlx_backend/vlm/modeling/models/phi3_v/vision.py +324 -0
- nexaai/mlx_backend/vlm/modeling/models/pixtral/__init__.py +8 -0
- nexaai/mlx_backend/vlm/modeling/models/pixtral/language.py +229 -0
- nexaai/mlx_backend/vlm/modeling/models/pixtral/pixtral.py +161 -0
- nexaai/mlx_backend/vlm/modeling/models/pixtral/vision.py +320 -0
- nexaai/mlx_backend/vlm/modeling/models/qwen2_5_vl/__init__.py +2 -0
- nexaai/mlx_backend/vlm/modeling/models/qwen2_5_vl/config.py +108 -0
- nexaai/mlx_backend/vlm/modeling/models/qwen2_5_vl/language.py +490 -0
- nexaai/mlx_backend/vlm/modeling/models/qwen2_5_vl/qwen2_5_vl.py +168 -0
- nexaai/mlx_backend/vlm/modeling/models/qwen2_5_vl/vision.py +414 -0
- nexaai/mlx_backend/vlm/modeling/models/qwen2_vl/__init__.py +2 -0
- nexaai/mlx_backend/vlm/modeling/models/qwen2_vl/config.py +104 -0
- nexaai/mlx_backend/vlm/modeling/models/qwen2_vl/language.py +490 -0
- nexaai/mlx_backend/vlm/modeling/models/qwen2_vl/qwen2_vl.py +167 -0
- nexaai/mlx_backend/vlm/modeling/models/qwen2_vl/vision.py +312 -0
- nexaai/mlx_backend/vlm/modeling/models/qwen3_vl/llm_common/__init__.py +0 -0
- nexaai/mlx_backend/vlm/modeling/models/qwen3_vl/llm_common/base.py +117 -0
- nexaai/mlx_backend/vlm/modeling/models/qwen3_vl/llm_common/cache.py +531 -0
- nexaai/mlx_backend/vlm/modeling/models/qwen3_vl/llm_common/generate.py +701 -0
- nexaai/mlx_backend/vlm/modeling/models/qwen3_vl/llm_common/rope_utils.py +255 -0
- nexaai/mlx_backend/vlm/modeling/models/qwen3_vl/llm_common/sample_utils.py +303 -0
- nexaai/mlx_backend/vlm/modeling/models/qwen3_vl/llm_common/tokenizer_utils.py +407 -0
- nexaai/mlx_backend/vlm/modeling/models/qwen3_vl/processor.py +476 -0
- nexaai/mlx_backend/vlm/modeling/models/qwen3_vl/qwen3vl.py +1262 -0
- nexaai/mlx_backend/vlm/modeling/models/qwen3vl_moe/llm_common/__init__.py +0 -0
- nexaai/mlx_backend/vlm/modeling/models/qwen3vl_moe/llm_common/base.py +117 -0
- nexaai/mlx_backend/vlm/modeling/models/qwen3vl_moe/llm_common/cache.py +531 -0
- nexaai/mlx_backend/vlm/modeling/models/qwen3vl_moe/llm_common/generate.py +701 -0
- nexaai/mlx_backend/vlm/modeling/models/qwen3vl_moe/llm_common/rope_utils.py +255 -0
- nexaai/mlx_backend/vlm/modeling/models/qwen3vl_moe/llm_common/sample_utils.py +303 -0
- nexaai/mlx_backend/vlm/modeling/models/qwen3vl_moe/llm_common/tokenizer_utils.py +407 -0
- nexaai/mlx_backend/vlm/modeling/models/qwen3vl_moe/processor.py +476 -0
- nexaai/mlx_backend/vlm/modeling/models/qwen3vl_moe/qwen3vl_moe.py +1308 -0
- nexaai/mlx_backend/vlm/modeling/models/qwen3vl_moe/switch_layers.py +210 -0
- nexaai/mlx_backend/vlm/modeling/models/smolvlm/__init__.py +8 -0
- nexaai/mlx_backend/vlm/modeling/models/smolvlm/smolvlm.py +62 -0
- nexaai/mlx_backend/vlm/modeling/processing_qwen2_5_vl.py +209 -0
- nexaai/mlx_backend/vlm/modeling/processing_qwen2_vl.py +215 -0
- nexaai/mlx_backend/vlm/modeling/prompt_utils.py +474 -0
- nexaai/mlx_backend/vlm/modeling/sample_utils.py +39 -0
- nexaai/mlx_backend/vlm/modeling/tokenizer_utils.py +344 -0
- nexaai/mlx_backend/vlm/modeling/trainer/__init__.py +9 -0
- nexaai/mlx_backend/vlm/modeling/trainer/lora.py +70 -0
- nexaai/mlx_backend/vlm/modeling/trainer/trainer.py +296 -0
- nexaai/mlx_backend/vlm/modeling/trainer/utils.py +160 -0
- nexaai/mlx_backend/vlm/modeling/utils.py +928 -0
- nexaai/rerank.py +57 -0
- nexaai/rerank_impl/__init__.py +0 -0
- nexaai/rerank_impl/mlx_rerank_impl.py +94 -0
- nexaai/rerank_impl/pybind_rerank_impl.py +136 -0
- nexaai/runtime.py +68 -0
- nexaai/runtime_error.py +24 -0
- nexaai/tts.py +75 -0
- nexaai/tts_impl/__init__.py +0 -0
- nexaai/tts_impl/mlx_tts_impl.py +94 -0
- nexaai/tts_impl/pybind_tts_impl.py +43 -0
- nexaai/utils/decode.py +18 -0
- nexaai/utils/manifest_utils.py +531 -0
- nexaai/utils/model_manager.py +1745 -0
- nexaai/utils/model_types.py +49 -0
- nexaai/utils/progress_tracker.py +389 -0
- nexaai/utils/quantization_utils.py +245 -0
- nexaai/vlm.py +130 -0
- nexaai/vlm_impl/__init__.py +0 -0
- nexaai/vlm_impl/mlx_vlm_impl.py +259 -0
- nexaai/vlm_impl/pybind_vlm_impl.py +275 -0
- nexaai-1.0.29.dist-info/METADATA +35 -0
- nexaai-1.0.29.dist-info/RECORD +580 -0
- nexaai-1.0.29.dist-info/WHEEL +5 -0
- nexaai-1.0.29.dist-info/top_level.txt +1 -0
nexaai/rerank.py
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
from typing import List, Optional, Sequence, Union
|
|
2
|
+
from abc import abstractmethod
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
|
|
5
|
+
from nexaai.base import BaseModel
|
|
6
|
+
from nexaai.common import PluginID
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@dataclass
|
|
10
|
+
class RerankConfig:
|
|
11
|
+
"""Configuration for reranking."""
|
|
12
|
+
batch_size: int = 1
|
|
13
|
+
normalize: bool = True
|
|
14
|
+
normalize_method: str = "softmax" # "softmax" | "min-max" | "none"
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class Reranker(BaseModel):
|
|
18
|
+
"""Abstract base class for reranker models."""
|
|
19
|
+
|
|
20
|
+
def __init__(self):
|
|
21
|
+
"""Initialize base Reranker class."""
|
|
22
|
+
pass
|
|
23
|
+
|
|
24
|
+
@classmethod
|
|
25
|
+
def _load_from(cls,
|
|
26
|
+
model_path: str,
|
|
27
|
+
model_name: str = None,
|
|
28
|
+
tokenizer_file: str = "tokenizer.json",
|
|
29
|
+
plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
|
|
30
|
+
device_id: Optional[str] = None,
|
|
31
|
+
**kwargs
|
|
32
|
+
) -> 'Reranker':
|
|
33
|
+
"""Load reranker model from local path, routing to appropriate implementation."""
|
|
34
|
+
# Check plugin_id value for routing - handle both enum and string
|
|
35
|
+
plugin_value = plugin_id.value if isinstance(plugin_id, PluginID) else plugin_id
|
|
36
|
+
|
|
37
|
+
if plugin_value == "mlx":
|
|
38
|
+
from nexaai.rerank_impl.mlx_rerank_impl import MLXRerankImpl
|
|
39
|
+
return MLXRerankImpl._load_from(model_path, model_name, tokenizer_file, plugin_id, device_id)
|
|
40
|
+
else:
|
|
41
|
+
from nexaai.rerank_impl.pybind_rerank_impl import PyBindRerankImpl
|
|
42
|
+
return PyBindRerankImpl._load_from(model_path, model_name, tokenizer_file, plugin_id, device_id)
|
|
43
|
+
|
|
44
|
+
@abstractmethod
|
|
45
|
+
def load_model(self, model_path: str, extra_data: Optional[str] = None) -> bool:
|
|
46
|
+
"""Load model from path."""
|
|
47
|
+
pass
|
|
48
|
+
|
|
49
|
+
@abstractmethod
|
|
50
|
+
def rerank(
|
|
51
|
+
self,
|
|
52
|
+
query: str,
|
|
53
|
+
documents: Sequence[str],
|
|
54
|
+
config: Optional[RerankConfig] = None,
|
|
55
|
+
) -> List[float]:
|
|
56
|
+
"""Rerank documents given a query."""
|
|
57
|
+
pass
|
|
File without changes
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
# Note: This code is generated by Cursor, not tested yet.
|
|
2
|
+
|
|
3
|
+
from typing import List, Optional, Sequence, Union
|
|
4
|
+
import os
|
|
5
|
+
|
|
6
|
+
from nexaai.common import PluginID
|
|
7
|
+
from nexaai.rerank import Reranker, RerankConfig
|
|
8
|
+
from nexaai.mlx_backend.rerank.interface import Reranker as MLXRerankInterface, create_reranker
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class MLXRerankImpl(Reranker):
|
|
12
|
+
def __init__(self):
|
|
13
|
+
"""Initialize MLX Rerank implementation."""
|
|
14
|
+
super().__init__()
|
|
15
|
+
self._mlx_reranker = None
|
|
16
|
+
|
|
17
|
+
@classmethod
|
|
18
|
+
def _load_from(cls,
|
|
19
|
+
model_path: str,
|
|
20
|
+
model_name: str = None,
|
|
21
|
+
tokenizer_file: str = "tokenizer.json",
|
|
22
|
+
plugin_id: Union[PluginID, str] = PluginID.MLX,
|
|
23
|
+
device_id: Optional[str] = None
|
|
24
|
+
) -> 'MLXRerankImpl':
|
|
25
|
+
"""Load reranker model from local path using MLX backend."""
|
|
26
|
+
try:
|
|
27
|
+
# MLX Rerank interfaces are already imported
|
|
28
|
+
|
|
29
|
+
# Create instance and load MLX reranker
|
|
30
|
+
instance = cls()
|
|
31
|
+
instance._mlx_reranker = create_reranker(
|
|
32
|
+
model_path=model_path,
|
|
33
|
+
# model_name=model_name, # FIXME: For MLX Reranker, model_name is not used
|
|
34
|
+
tokenizer_path=tokenizer_file,
|
|
35
|
+
device=device_id
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
# Load the model
|
|
39
|
+
success = instance._mlx_reranker.load_model(model_path)
|
|
40
|
+
if not success:
|
|
41
|
+
raise RuntimeError("Failed to load MLX reranker model")
|
|
42
|
+
|
|
43
|
+
return instance
|
|
44
|
+
except Exception as e:
|
|
45
|
+
raise RuntimeError(f"Failed to load MLX Reranker: {str(e)}")
|
|
46
|
+
|
|
47
|
+
def eject(self):
|
|
48
|
+
"""Destroy the model and free resources."""
|
|
49
|
+
if self._mlx_reranker:
|
|
50
|
+
self._mlx_reranker.destroy()
|
|
51
|
+
self._mlx_reranker = None
|
|
52
|
+
|
|
53
|
+
def load_model(self, model_path: str, extra_data: Optional[str] = None) -> bool:
|
|
54
|
+
"""Load model from path."""
|
|
55
|
+
if not self._mlx_reranker:
|
|
56
|
+
raise RuntimeError("MLX Reranker not initialized")
|
|
57
|
+
|
|
58
|
+
try:
|
|
59
|
+
return self._mlx_reranker.load_model(model_path, extra_data)
|
|
60
|
+
except Exception as e:
|
|
61
|
+
raise RuntimeError(f"Failed to load reranker model: {str(e)}")
|
|
62
|
+
|
|
63
|
+
def rerank(
|
|
64
|
+
self,
|
|
65
|
+
query: str,
|
|
66
|
+
documents: Sequence[str],
|
|
67
|
+
config: Optional[RerankConfig] = None,
|
|
68
|
+
) -> List[float]:
|
|
69
|
+
"""Rerank documents given a query."""
|
|
70
|
+
if not self._mlx_reranker:
|
|
71
|
+
raise RuntimeError("MLX Reranker not loaded")
|
|
72
|
+
|
|
73
|
+
try:
|
|
74
|
+
# Convert our config to MLX format if provided
|
|
75
|
+
mlx_config = None
|
|
76
|
+
if config:
|
|
77
|
+
from nexaai.mlx_backend.rerank.interface import RerankConfig as MLXRerankConfig
|
|
78
|
+
|
|
79
|
+
mlx_config = MLXRerankConfig(
|
|
80
|
+
batch_size=config.batch_size,
|
|
81
|
+
normalize=config.normalize,
|
|
82
|
+
normalize_method=config.normalize_method
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
# Use MLX reranking
|
|
86
|
+
scores = self._mlx_reranker.rerank(query, documents, mlx_config)
|
|
87
|
+
|
|
88
|
+
# Convert mx.array to Python list of floats
|
|
89
|
+
return scores.tolist()
|
|
90
|
+
|
|
91
|
+
except Exception as e:
|
|
92
|
+
raise RuntimeError(f"Failed to rerank documents: {str(e)}")
|
|
93
|
+
|
|
94
|
+
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
from typing import List, Optional, Sequence, Union
|
|
2
|
+
import numpy as np
|
|
3
|
+
|
|
4
|
+
from nexaai.common import PluginID
|
|
5
|
+
from nexaai.rerank import Reranker, RerankConfig
|
|
6
|
+
from nexaai.binds import rerank_bind, common_bind
|
|
7
|
+
from nexaai.runtime import _ensure_runtime
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class PyBindRerankImpl(Reranker):
|
|
11
|
+
def __init__(self, _handle_ptr):
|
|
12
|
+
"""
|
|
13
|
+
Internal initializer
|
|
14
|
+
|
|
15
|
+
Args:
|
|
16
|
+
_handle_ptr: Capsule handle to the C++ reranker object
|
|
17
|
+
"""
|
|
18
|
+
super().__init__()
|
|
19
|
+
self._handle = _handle_ptr
|
|
20
|
+
|
|
21
|
+
@classmethod
|
|
22
|
+
def _load_from(cls,
|
|
23
|
+
model_path: str,
|
|
24
|
+
model_name: str = None,
|
|
25
|
+
tokenizer_file: str = "tokenizer.json",
|
|
26
|
+
plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
|
|
27
|
+
device_id: Optional[str] = None
|
|
28
|
+
) -> 'PyBindRerankImpl':
|
|
29
|
+
"""
|
|
30
|
+
Load reranker model from local path using PyBind backend.
|
|
31
|
+
|
|
32
|
+
Args:
|
|
33
|
+
model_path: Path to the model file
|
|
34
|
+
model_name: Name of the model (optional)
|
|
35
|
+
tokenizer_file: Path to the tokenizer file (default: "tokenizer.json")
|
|
36
|
+
plugin_id: Plugin ID to use for the model (default: PluginID.LLAMA_CPP)
|
|
37
|
+
device_id: Device ID to use for the model (optional)
|
|
38
|
+
|
|
39
|
+
Returns:
|
|
40
|
+
PyBindRerankImpl instance
|
|
41
|
+
"""
|
|
42
|
+
_ensure_runtime()
|
|
43
|
+
|
|
44
|
+
# Convert enum to string for C++ binding
|
|
45
|
+
plugin_id_str = plugin_id.value if isinstance(plugin_id, PluginID) else plugin_id
|
|
46
|
+
|
|
47
|
+
# Create model config
|
|
48
|
+
model_config = common_bind.ModelConfig()
|
|
49
|
+
|
|
50
|
+
# Create reranker handle with new API signature
|
|
51
|
+
handle = rerank_bind.ml_reranker_create(
|
|
52
|
+
model_path,
|
|
53
|
+
model_name,
|
|
54
|
+
tokenizer_file,
|
|
55
|
+
model_config,
|
|
56
|
+
plugin_id_str,
|
|
57
|
+
device_id
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
return cls(handle)
|
|
61
|
+
|
|
62
|
+
def eject(self):
|
|
63
|
+
"""
|
|
64
|
+
Clean up resources and destroy the reranker
|
|
65
|
+
"""
|
|
66
|
+
# Destructor of the handle will unload the model correctly
|
|
67
|
+
if hasattr(self, '_handle') and self._handle is not None:
|
|
68
|
+
del self._handle
|
|
69
|
+
self._handle = None
|
|
70
|
+
|
|
71
|
+
def load_model(self, model_path: str, extra_data: Optional[str] = None) -> bool:
|
|
72
|
+
"""
|
|
73
|
+
Load model from path.
|
|
74
|
+
|
|
75
|
+
Note: This method is not typically used directly. Use _load_from instead.
|
|
76
|
+
|
|
77
|
+
Args:
|
|
78
|
+
model_path: Path to the model file
|
|
79
|
+
extra_data: Additional data (unused)
|
|
80
|
+
|
|
81
|
+
Returns:
|
|
82
|
+
True if successful
|
|
83
|
+
"""
|
|
84
|
+
# This method is part of the BaseModel interface but typically not used
|
|
85
|
+
# directly for PyBind implementations since _load_from handles creation
|
|
86
|
+
raise NotImplementedError("Use _load_from class method to load models")
|
|
87
|
+
|
|
88
|
+
def rerank(
|
|
89
|
+
self,
|
|
90
|
+
query: str,
|
|
91
|
+
documents: Sequence[str],
|
|
92
|
+
config: Optional[RerankConfig] = None,
|
|
93
|
+
) -> List[float]:
|
|
94
|
+
"""
|
|
95
|
+
Rerank documents given a query.
|
|
96
|
+
|
|
97
|
+
Args:
|
|
98
|
+
query: Query text as UTF-8 string
|
|
99
|
+
documents: List of document texts to rerank
|
|
100
|
+
config: Optional reranking configuration
|
|
101
|
+
|
|
102
|
+
Returns:
|
|
103
|
+
List of ranking scores (one per document)
|
|
104
|
+
"""
|
|
105
|
+
if self._handle is None:
|
|
106
|
+
raise RuntimeError("Reranker handle is None. Model may have been ejected.")
|
|
107
|
+
|
|
108
|
+
# Use default config if not provided
|
|
109
|
+
if config is None:
|
|
110
|
+
config = RerankConfig()
|
|
111
|
+
|
|
112
|
+
# Create bind config
|
|
113
|
+
bind_config = rerank_bind.RerankConfig()
|
|
114
|
+
bind_config.batch_size = config.batch_size
|
|
115
|
+
bind_config.normalize = config.normalize
|
|
116
|
+
bind_config.normalize_method = config.normalize_method
|
|
117
|
+
|
|
118
|
+
# Convert documents to list if needed
|
|
119
|
+
documents_list = list(documents)
|
|
120
|
+
|
|
121
|
+
# Call the binding which returns a dict with scores and profile_data
|
|
122
|
+
result = rerank_bind.ml_reranker_rerank(
|
|
123
|
+
self._handle,
|
|
124
|
+
query,
|
|
125
|
+
documents_list,
|
|
126
|
+
bind_config
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
# Extract scores from result dict
|
|
130
|
+
scores_array = result.get("scores", np.array([]))
|
|
131
|
+
|
|
132
|
+
# Convert numpy array to list of floats
|
|
133
|
+
if isinstance(scores_array, np.ndarray):
|
|
134
|
+
return scores_array.tolist()
|
|
135
|
+
else:
|
|
136
|
+
return []
|
nexaai/runtime.py
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
import atexit
|
|
3
|
+
import threading
|
|
4
|
+
from typing import Optional, Any
|
|
5
|
+
|
|
6
|
+
from nexaai.binds import common_bind
|
|
7
|
+
|
|
8
|
+
_init_lock = threading.Lock()
|
|
9
|
+
_runtime_alive = False # global flag
|
|
10
|
+
|
|
11
|
+
def _ensure_runtime() -> None:
|
|
12
|
+
"""Initialise the runtime exactly once (thread‑safe, lazy)."""
|
|
13
|
+
global _runtime_alive
|
|
14
|
+
if not _runtime_alive:
|
|
15
|
+
with _init_lock:
|
|
16
|
+
if not _runtime_alive: # double‑checked locking
|
|
17
|
+
common_bind.ml_init()
|
|
18
|
+
_runtime_alive = True
|
|
19
|
+
atexit.register(_shutdown_runtime)
|
|
20
|
+
|
|
21
|
+
def _shutdown_runtime() -> None:
|
|
22
|
+
"""Tear the runtime down; idempotent and registered with atexit."""
|
|
23
|
+
global _runtime_alive
|
|
24
|
+
if _runtime_alive:
|
|
25
|
+
common_bind.ml_deinit()
|
|
26
|
+
_runtime_alive = False
|
|
27
|
+
|
|
28
|
+
# Public helper so advanced users can reclaim memory on demand
|
|
29
|
+
shutdown = _shutdown_runtime
|
|
30
|
+
|
|
31
|
+
def is_initialized() -> bool:
|
|
32
|
+
"""Check if the runtime has been initialized."""
|
|
33
|
+
return _runtime_alive
|
|
34
|
+
|
|
35
|
+
# ----------------------------------------------------------------------
|
|
36
|
+
# Single public class
|
|
37
|
+
# ----------------------------------------------------------------------
|
|
38
|
+
class Session:
|
|
39
|
+
"""
|
|
40
|
+
Model session **and** runtime guard in one object.
|
|
41
|
+
|
|
42
|
+
sess = myrt.Session("foo.mdl")
|
|
43
|
+
out = sess.run(inputs)
|
|
44
|
+
sess.close() # optional (model only)
|
|
45
|
+
|
|
46
|
+
The global runtime is initialised lazily when the first Session
|
|
47
|
+
is created and stays alive until:
|
|
48
|
+
• the interpreter exits, or
|
|
49
|
+
• `myrt.shutdown()` is called.
|
|
50
|
+
"""
|
|
51
|
+
|
|
52
|
+
# ---- construction -------------------------------------------------
|
|
53
|
+
def __init__(self, model_path: str) -> None:
|
|
54
|
+
_ensure_runtime()
|
|
55
|
+
|
|
56
|
+
# safety net – make GC close the model
|
|
57
|
+
def __del__(self) -> None:
|
|
58
|
+
try:
|
|
59
|
+
self.close()
|
|
60
|
+
except Exception:
|
|
61
|
+
pass
|
|
62
|
+
|
|
63
|
+
# allow `with Session(...) as s:` syntax
|
|
64
|
+
def __enter__(self) -> "Session":
|
|
65
|
+
return self
|
|
66
|
+
|
|
67
|
+
def __exit__(self, exc_type, exc, tb) -> None:
|
|
68
|
+
self.close()
|
nexaai/runtime_error.py
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
"""Runtime errors for Nexa SDK operations."""
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class NexaRuntimeError(Exception):
|
|
5
|
+
"""Base class for Nexa runtime errors."""
|
|
6
|
+
|
|
7
|
+
def __init__(self, message: str, error_code: int = None):
|
|
8
|
+
self.error_code = error_code
|
|
9
|
+
super().__init__(message)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class ContextLengthExceededError(NexaRuntimeError):
|
|
13
|
+
"""Raised when the input context length exceeds the model's maximum."""
|
|
14
|
+
|
|
15
|
+
def __init__(self, message: str = "Input context length exceeded model's maximum", error_code: int = None):
|
|
16
|
+
super().__init__(message, error_code)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class GenerationError(NexaRuntimeError):
|
|
20
|
+
"""Raised when generation fails."""
|
|
21
|
+
|
|
22
|
+
def __init__(self, message: str = "Generation failed", error_code: int = None):
|
|
23
|
+
super().__init__(message, error_code)
|
|
24
|
+
|
nexaai/tts.py
ADDED
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
from typing import List, Optional, Union
|
|
2
|
+
from abc import abstractmethod
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
|
|
5
|
+
from nexaai.base import BaseModel
|
|
6
|
+
from nexaai.common import PluginID
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@dataclass
|
|
10
|
+
class TTSConfig:
|
|
11
|
+
"""Configuration for TTS."""
|
|
12
|
+
voice: str = "default"
|
|
13
|
+
speed: float = 1.0
|
|
14
|
+
seed: int = -1 # –1 for random
|
|
15
|
+
sample_rate: int = 22050
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@dataclass
|
|
19
|
+
class TTSSamplerConfig:
|
|
20
|
+
"""Configuration for TTS sampling."""
|
|
21
|
+
temperature: float = 1.0
|
|
22
|
+
noise_scale: float = 0.667
|
|
23
|
+
length_scale: float = 1.0
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@dataclass
|
|
27
|
+
class TTSResult:
|
|
28
|
+
"""Result from TTS processing."""
|
|
29
|
+
audio_path: str # Path where the synthesized audio is saved
|
|
30
|
+
duration_seconds: float
|
|
31
|
+
sample_rate: int
|
|
32
|
+
channels: int
|
|
33
|
+
num_samples: int
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class TTS(BaseModel):
|
|
37
|
+
"""Abstract base class for Text-to-Speech models."""
|
|
38
|
+
|
|
39
|
+
def __init__(self):
|
|
40
|
+
"""Initialize base TTS class."""
|
|
41
|
+
pass
|
|
42
|
+
|
|
43
|
+
@classmethod
|
|
44
|
+
def _load_from(cls,
|
|
45
|
+
model_path: str,
|
|
46
|
+
vocoder_path: str,
|
|
47
|
+
plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
|
|
48
|
+
device_id: Optional[str] = None,
|
|
49
|
+
**kwargs
|
|
50
|
+
) -> 'TTS':
|
|
51
|
+
"""Load TTS model from local path, routing to appropriate implementation."""
|
|
52
|
+
# Check plugin_id value for routing - handle both enum and string
|
|
53
|
+
plugin_value = plugin_id.value if isinstance(plugin_id, PluginID) else plugin_id
|
|
54
|
+
|
|
55
|
+
if plugin_value == "mlx":
|
|
56
|
+
from nexaai.tts_impl.mlx_tts_impl import MLXTTSImpl
|
|
57
|
+
return MLXTTSImpl._load_from(model_path, vocoder_path, plugin_id, device_id)
|
|
58
|
+
else:
|
|
59
|
+
from nexaai.tts_impl.pybind_tts_impl import PyBindTTSImpl
|
|
60
|
+
return PyBindTTSImpl._load_from(model_path, vocoder_path, plugin_id, device_id)
|
|
61
|
+
|
|
62
|
+
@abstractmethod
|
|
63
|
+
def synthesize(
|
|
64
|
+
self,
|
|
65
|
+
text: str,
|
|
66
|
+
config: Optional[TTSConfig] = None,
|
|
67
|
+
output_path: Optional[str] = None,
|
|
68
|
+
) -> TTSResult:
|
|
69
|
+
"""Synthesize speech from text and save to filesystem."""
|
|
70
|
+
pass
|
|
71
|
+
|
|
72
|
+
@abstractmethod
|
|
73
|
+
def list_available_voices(self) -> List[str]:
|
|
74
|
+
"""List available voices."""
|
|
75
|
+
pass
|
|
File without changes
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
# Note: This code is generated by Cursor, not tested yet.
|
|
2
|
+
|
|
3
|
+
from typing import List, Optional, Union
|
|
4
|
+
import os
|
|
5
|
+
|
|
6
|
+
from nexaai.common import PluginID
|
|
7
|
+
from nexaai.tts import TTS, TTSConfig, TTSResult
|
|
8
|
+
from nexaai.mlx_backend.tts.interface import MlxTts as MLXTTSInterface
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class MLXTTSImpl(TTS):
|
|
12
|
+
def __init__(self):
|
|
13
|
+
"""Initialize MLX TTS implementation."""
|
|
14
|
+
super().__init__()
|
|
15
|
+
self._mlx_tts = None
|
|
16
|
+
|
|
17
|
+
@classmethod
|
|
18
|
+
def _load_from(cls,
|
|
19
|
+
model_path: str,
|
|
20
|
+
vocoder_path: str,
|
|
21
|
+
plugin_id: Union[PluginID, str] = PluginID.MLX,
|
|
22
|
+
device_id: Optional[str] = None
|
|
23
|
+
) -> 'MLXTTSImpl':
|
|
24
|
+
"""Load TTS model from local path using MLX backend."""
|
|
25
|
+
try:
|
|
26
|
+
# MLX TTS interface is already imported
|
|
27
|
+
|
|
28
|
+
# Create instance and load MLX TTS
|
|
29
|
+
instance = cls()
|
|
30
|
+
instance._mlx_tts = MLXTTSInterface(
|
|
31
|
+
model_path=model_path,
|
|
32
|
+
vocoder_path=vocoder_path,
|
|
33
|
+
device=device_id
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
return instance
|
|
37
|
+
except Exception as e:
|
|
38
|
+
raise RuntimeError(f"Failed to load MLX TTS: {str(e)}")
|
|
39
|
+
|
|
40
|
+
def eject(self):
|
|
41
|
+
"""Destroy the model and free resources."""
|
|
42
|
+
if self._mlx_tts:
|
|
43
|
+
self._mlx_tts.destroy()
|
|
44
|
+
self._mlx_tts = None
|
|
45
|
+
|
|
46
|
+
def synthesize(
|
|
47
|
+
self,
|
|
48
|
+
text: str,
|
|
49
|
+
config: Optional[TTSConfig] = None,
|
|
50
|
+
output_path: Optional[str] = None,
|
|
51
|
+
) -> TTSResult:
|
|
52
|
+
"""Synthesize speech from text and save to filesystem."""
|
|
53
|
+
if not self._mlx_tts:
|
|
54
|
+
raise RuntimeError("MLX TTS not loaded")
|
|
55
|
+
|
|
56
|
+
try:
|
|
57
|
+
# Convert our config to MLX format if provided
|
|
58
|
+
mlx_config = None
|
|
59
|
+
if config:
|
|
60
|
+
from nexaai.mlx_backend.ml import TTSConfig as MLXTTSConfig
|
|
61
|
+
|
|
62
|
+
mlx_config = MLXTTSConfig(
|
|
63
|
+
voice=config.voice,
|
|
64
|
+
speed=config.speed,
|
|
65
|
+
seed=config.seed,
|
|
66
|
+
sample_rate=config.sample_rate
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
# Use MLX TTS synthesis
|
|
70
|
+
result = self._mlx_tts.synthesize(text, mlx_config, output_path)
|
|
71
|
+
|
|
72
|
+
# Convert MLX result to our format
|
|
73
|
+
return TTSResult(
|
|
74
|
+
audio_path=result.audio_path,
|
|
75
|
+
duration_seconds=result.duration_seconds,
|
|
76
|
+
sample_rate=result.sample_rate,
|
|
77
|
+
channels=result.channels,
|
|
78
|
+
num_samples=result.num_samples
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
except Exception as e:
|
|
82
|
+
raise RuntimeError(f"Failed to synthesize speech: {str(e)}")
|
|
83
|
+
|
|
84
|
+
def list_available_voices(self) -> List[str]:
|
|
85
|
+
"""List available voices."""
|
|
86
|
+
if not self._mlx_tts:
|
|
87
|
+
raise RuntimeError("MLX TTS not loaded")
|
|
88
|
+
|
|
89
|
+
try:
|
|
90
|
+
return self._mlx_tts.list_available_voices()
|
|
91
|
+
except Exception as e:
|
|
92
|
+
raise RuntimeError(f"Failed to list available voices: {str(e)}")
|
|
93
|
+
|
|
94
|
+
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
from typing import List, Optional, Union
|
|
2
|
+
|
|
3
|
+
from nexaai.common import PluginID
|
|
4
|
+
from nexaai.tts import TTS, TTSConfig, TTSResult
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class PyBindTTSImpl(TTS):
|
|
8
|
+
def __init__(self):
|
|
9
|
+
"""Initialize PyBind TTS implementation."""
|
|
10
|
+
super().__init__()
|
|
11
|
+
# TODO: Add PyBind-specific initialization
|
|
12
|
+
|
|
13
|
+
@classmethod
|
|
14
|
+
def _load_from(cls,
|
|
15
|
+
model_path: str,
|
|
16
|
+
vocoder_path: str,
|
|
17
|
+
plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
|
|
18
|
+
device_id: Optional[str] = None
|
|
19
|
+
) -> 'PyBindTTSImpl':
|
|
20
|
+
"""Load TTS model from local path using PyBind backend."""
|
|
21
|
+
# TODO: Implement PyBind TTS loading
|
|
22
|
+
instance = cls()
|
|
23
|
+
return instance
|
|
24
|
+
|
|
25
|
+
def eject(self):
|
|
26
|
+
"""Destroy the model and free resources."""
|
|
27
|
+
# TODO: Implement PyBind TTS cleanup
|
|
28
|
+
pass
|
|
29
|
+
|
|
30
|
+
def synthesize(
|
|
31
|
+
self,
|
|
32
|
+
text: str,
|
|
33
|
+
config: Optional[TTSConfig] = None,
|
|
34
|
+
output_path: Optional[str] = None,
|
|
35
|
+
) -> TTSResult:
|
|
36
|
+
"""Synthesize speech from text and save to filesystem."""
|
|
37
|
+
# TODO: Implement PyBind TTS synthesis
|
|
38
|
+
raise NotImplementedError("PyBind TTS synthesis not yet implemented")
|
|
39
|
+
|
|
40
|
+
def list_available_voices(self) -> List[str]:
|
|
41
|
+
"""List available voices."""
|
|
42
|
+
# TODO: Implement PyBind TTS voice listing
|
|
43
|
+
raise NotImplementedError("PyBind TTS voice listing not yet implemented")
|
nexaai/utils/decode.py
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Utility functions for text decoding operations.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def safe_decode(data):
|
|
7
|
+
"""
|
|
8
|
+
Safely decode bytes or text, handling UTF-8 errors.
|
|
9
|
+
|
|
10
|
+
Args:
|
|
11
|
+
data: Input data that can be bytes or text
|
|
12
|
+
|
|
13
|
+
Returns:
|
|
14
|
+
str: Decoded string with errors replaced if any
|
|
15
|
+
"""
|
|
16
|
+
if isinstance(data, bytes):
|
|
17
|
+
return data.decode('utf-8', errors='replace')
|
|
18
|
+
return str(data)
|