nexaai 1.0.16rc13__cp310-cp310-macosx_15_0_universal2.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nexaai might be problematic. Click here for more details.
- nexaai/__init__.py +83 -0
- nexaai/_stub.cpython-310-darwin.so +0 -0
- nexaai/_version.py +4 -0
- nexaai/asr.py +64 -0
- nexaai/asr_impl/__init__.py +0 -0
- nexaai/asr_impl/mlx_asr_impl.py +92 -0
- nexaai/asr_impl/pybind_asr_impl.py +44 -0
- nexaai/base.py +39 -0
- nexaai/binds/__init__.py +4 -0
- nexaai/binds/common_bind.cpython-310-darwin.so +0 -0
- nexaai/binds/embedder_bind.cpython-310-darwin.so +0 -0
- nexaai/binds/libnexa_bridge.dylib +0 -0
- nexaai/binds/llm_bind.cpython-310-darwin.so +0 -0
- nexaai/binds/nexa_llama_cpp/libggml-base.dylib +0 -0
- nexaai/binds/nexa_llama_cpp/libggml-cpu.so +0 -0
- nexaai/binds/nexa_llama_cpp/libggml-metal.so +0 -0
- nexaai/binds/nexa_llama_cpp/libggml.dylib +0 -0
- nexaai/binds/nexa_llama_cpp/libllama.dylib +0 -0
- nexaai/binds/nexa_llama_cpp/libmtmd.dylib +0 -0
- nexaai/binds/nexa_llama_cpp/libnexa_plugin.dylib +0 -0
- nexaai/binds/nexa_mlx/libnexa_plugin.dylib +0 -0
- nexaai/binds/nexa_mlx/py-lib/ml.py +888 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/__init__.py +0 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/__init__.py +1 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/__init__.py +5 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/bigvgan/__init__.py +1 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/bigvgan/activation.py +51 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/bigvgan/amp.py +96 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/bigvgan/bigvgan.py +149 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/bigvgan/conv.py +114 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/bigvgan/resample.py +177 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/descript/__init__.py +1 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/descript/base.py +228 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/descript/dac.py +285 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/descript/nn/__init__.py +1 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/descript/nn/layers.py +129 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/descript/nn/quantize.py +149 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/encodec/__init__.py +1 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/encodec/encodec.py +777 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/mimi/__init__.py +1 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/mimi/mimi.py +286 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/mimi/modules/__init__.py +20 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/mimi/modules/conv.py +398 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/mimi/modules/kv_cache.py +199 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/mimi/modules/quantization.py +179 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/mimi/modules/seanet.py +314 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/mimi/modules/transformer.py +256 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/s3/__init__.py +1 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/s3/model.py +260 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/s3/model_v2.py +383 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/s3/utils.py +122 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/snac/__init__.py +1 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/snac/attention.py +97 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/snac/layers.py +306 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/snac/snac.py +154 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/snac/vq.py +135 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/vocos/__init__.py +1 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/vocos/mel.py +33 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/vocos/vocos.py +359 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/tests/__init__.py +0 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/tests/test_bigvgan.py +54 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/tests/test_descript.py +109 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/tests/test_encodec.py +58 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/tests/test_mimi.py +22 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/tests/test_s3.py +25 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/tests/test_snac.py +40 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/tests/test_vocos.py +93 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/server.py +525 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/sts/__init__.py +0 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/sts/tests/test_voice_pipeline.py +156 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/sts/voice_pipeline.py +327 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/stt/__init__.py +0 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/stt/generate.py +174 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/stt/models/__init__.py +0 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/stt/models/parakeet/__init__.py +1 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/stt/models/parakeet/alignment.py +248 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/stt/models/parakeet/attention.py +187 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/stt/models/parakeet/audio.py +76 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/stt/models/parakeet/conformer.py +331 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/stt/models/parakeet/ctc.py +34 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/stt/models/parakeet/parakeet.py +604 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/stt/models/parakeet/rnnt.py +157 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/stt/models/parakeet/tokenizer.py +2 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/stt/models/wav2vec/feature_extractor.py +757 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/stt/models/wav2vec/wav2vec.py +738 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/stt/models/whisper/__init__.py +1 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/stt/models/whisper/audio.py +82 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/stt/models/whisper/decoding.py +742 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/stt/models/whisper/timing.py +329 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/stt/models/whisper/tokenizer.py +398 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/stt/models/whisper/whisper.py +862 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/stt/models/whisper/writers.py +268 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/stt/tests/test_models.py +381 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/stt/utils.py +195 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/__init__.py +1 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/audio_player.py +120 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/convert.py +71 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/generate.py +449 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/__init__.py +0 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/bark/__init__.py +4 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/bark/bark.py +528 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/bark/isftnet.py +12 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/bark/pipeline.py +442 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/base.py +84 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/dia/__init__.py +1 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/dia/audio.py +287 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/dia/config.py +256 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/dia/dia.py +592 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/dia/layers.py +870 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/indextts/__init__.py +3 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/indextts/attention.py +180 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/indextts/bigvgan.py +124 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/indextts/conformer.py +247 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/indextts/ecapa_tdnn/__init__.py +0 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/indextts/ecapa_tdnn/asp.py +59 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/indextts/ecapa_tdnn/ecapa_tdnn.py +91 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/indextts/ecapa_tdnn/se_res2net.py +132 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/indextts/ecapa_tdnn/tdnn.py +42 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/indextts/gpt2.py +38 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/indextts/indextts.py +412 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/indextts/mel.py +37 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/indextts/normalize.py +294 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/indextts/perceiver.py +62 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/interpolate.py +108 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/kokoro/__init__.py +4 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/kokoro/istftnet.py +979 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/kokoro/kokoro.py +331 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/kokoro/modules.py +659 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/kokoro/pipeline.py +453 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/kokoro/voice.py +113 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/llama/__init__.py +3 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/llama/llama.py +324 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/outetts/__init__.py +1 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/outetts/audio_processor.py +351 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/outetts/dac_interface.py +162 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/outetts/outetts.py +255 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/outetts/prompt_processor.py +181 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/outetts/tokens.py +36 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/sesame/__init__.py +3 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/sesame/attention.py +195 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/sesame/sesame.py +633 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/sesame/watermarking.py +105 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/spark/__init__.py +1 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/spark/audio_tokenizer.py +138 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/spark/bicodec.py +269 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/spark/modules/__init__.py +0 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/spark/modules/blocks/__init__.py +0 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/spark/modules/blocks/sampler.py +111 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/spark/modules/encoder_decoder/__init__.py +0 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/spark/modules/encoder_decoder/feat_decoder.py +120 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/spark/modules/encoder_decoder/feat_encoder.py +136 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/spark/modules/encoder_decoder/wave_generator.py +113 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/spark/modules/finite_scalar_quantization.py +238 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/spark/modules/residual.py +209 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/spark/modules/residual_fsq.py +309 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/spark/modules/speaker/__init__.py +1 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/spark/modules/speaker/ecapa_tdnn.py +283 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/spark/modules/speaker/perceiver_encoder.py +326 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/spark/modules/speaker/pooling_layers.py +297 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/spark/modules/speaker/speaker_encoder.py +155 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/spark/spark.py +382 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/spark/utils/audio.py +220 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/spark/utils/file.py +221 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/spark/utils/token_parser.py +181 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/tests/__init__.py +0 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/tests/test_base.py +66 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/tests/test_convert.py +173 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/tests/test_interpolate.py +88 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/tests/test_models.py +974 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/utils.py +337 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/utils.py +237 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/version.py +1 -0
- nexaai/binds/nexa_mlx/py-lib/profiling.py +239 -0
- nexaai/binds/nexa_nexaml/libggml-base.dylib +0 -0
- nexaai/binds/nexa_nexaml/libggml-cpu.so +0 -0
- nexaai/binds/nexa_nexaml/libggml-metal.so +0 -0
- nexaai/binds/nexa_nexaml/libggml.dylib +0 -0
- nexaai/binds/nexa_nexaml/libnexa-mm-process.dylib +0 -0
- nexaai/binds/nexa_nexaml/libnexa-sampling.dylib +0 -0
- nexaai/binds/nexa_nexaml/libnexa_plugin.dylib +0 -0
- nexaai/binds/nexa_nexaml/libnexaproc.dylib +0 -0
- nexaai/binds/nexa_nexaml/libqwen3-vl.dylib +0 -0
- nexaai/binds/nexa_nexaml/libqwen3vl-vision.dylib +0 -0
- nexaai/binds/vlm_bind.cpython-310-darwin.so +0 -0
- nexaai/common.py +104 -0
- nexaai/cv.py +92 -0
- nexaai/cv_impl/__init__.py +0 -0
- nexaai/cv_impl/mlx_cv_impl.py +89 -0
- nexaai/cv_impl/pybind_cv_impl.py +32 -0
- nexaai/embedder.py +72 -0
- nexaai/embedder_impl/__init__.py +0 -0
- nexaai/embedder_impl/mlx_embedder_impl.py +116 -0
- nexaai/embedder_impl/pybind_embedder_impl.py +95 -0
- nexaai/image_gen.py +140 -0
- nexaai/image_gen_impl/__init__.py +0 -0
- nexaai/image_gen_impl/mlx_image_gen_impl.py +292 -0
- nexaai/image_gen_impl/pybind_image_gen_impl.py +85 -0
- nexaai/llm.py +96 -0
- nexaai/llm_impl/__init__.py +0 -0
- nexaai/llm_impl/mlx_llm_impl.py +269 -0
- nexaai/llm_impl/pybind_llm_impl.py +218 -0
- nexaai/log.py +92 -0
- nexaai/mlx_backend/asr/__init__.py +12 -0
- nexaai/mlx_backend/asr/interface.py +122 -0
- nexaai/mlx_backend/common/__init__.py +0 -0
- nexaai/mlx_backend/common/utils.py +25 -0
- nexaai/mlx_backend/cv/__init__.py +0 -0
- nexaai/mlx_backend/cv/generate.py +195 -0
- nexaai/mlx_backend/cv/interface.py +151 -0
- nexaai/mlx_backend/cv/main.py +81 -0
- nexaai/mlx_backend/cv/modeling/pp_ocr_v4.py +1736 -0
- nexaai/mlx_backend/embedding/__init__.py +0 -0
- nexaai/mlx_backend/embedding/generate.py +333 -0
- nexaai/mlx_backend/embedding/interface.py +617 -0
- nexaai/mlx_backend/embedding/main.py +173 -0
- nexaai/mlx_backend/embedding/modeling/__init__.py +0 -0
- nexaai/mlx_backend/embedding/modeling/nexa_jina_v2.py +399 -0
- nexaai/mlx_backend/image_gen/__init__.py +1 -0
- nexaai/mlx_backend/image_gen/generate_sd.py +244 -0
- nexaai/mlx_backend/image_gen/interface.py +82 -0
- nexaai/mlx_backend/image_gen/main.py +281 -0
- nexaai/mlx_backend/image_gen/stable_diffusion/__init__.py +306 -0
- nexaai/mlx_backend/image_gen/stable_diffusion/clip.py +116 -0
- nexaai/mlx_backend/image_gen/stable_diffusion/config.py +65 -0
- nexaai/mlx_backend/image_gen/stable_diffusion/model_io.py +386 -0
- nexaai/mlx_backend/image_gen/stable_diffusion/sampler.py +105 -0
- nexaai/mlx_backend/image_gen/stable_diffusion/tokenizer.py +100 -0
- nexaai/mlx_backend/image_gen/stable_diffusion/unet.py +460 -0
- nexaai/mlx_backend/image_gen/stable_diffusion/vae.py +274 -0
- nexaai/mlx_backend/llm/__init__.py +0 -0
- nexaai/mlx_backend/llm/generate.py +149 -0
- nexaai/mlx_backend/llm/interface.py +764 -0
- nexaai/mlx_backend/llm/main.py +68 -0
- nexaai/mlx_backend/ml.py +888 -0
- nexaai/mlx_backend/mlx_audio/__init__.py +0 -0
- nexaai/mlx_backend/mlx_audio/codec/__init__.py +1 -0
- nexaai/mlx_backend/mlx_audio/codec/models/__init__.py +5 -0
- nexaai/mlx_backend/mlx_audio/codec/models/bigvgan/__init__.py +1 -0
- nexaai/mlx_backend/mlx_audio/codec/models/bigvgan/activation.py +51 -0
- nexaai/mlx_backend/mlx_audio/codec/models/bigvgan/amp.py +96 -0
- nexaai/mlx_backend/mlx_audio/codec/models/bigvgan/bigvgan.py +149 -0
- nexaai/mlx_backend/mlx_audio/codec/models/bigvgan/conv.py +114 -0
- nexaai/mlx_backend/mlx_audio/codec/models/bigvgan/resample.py +177 -0
- nexaai/mlx_backend/mlx_audio/codec/models/descript/__init__.py +1 -0
- nexaai/mlx_backend/mlx_audio/codec/models/descript/base.py +228 -0
- nexaai/mlx_backend/mlx_audio/codec/models/descript/dac.py +285 -0
- nexaai/mlx_backend/mlx_audio/codec/models/descript/nn/__init__.py +1 -0
- nexaai/mlx_backend/mlx_audio/codec/models/descript/nn/layers.py +129 -0
- nexaai/mlx_backend/mlx_audio/codec/models/descript/nn/quantize.py +149 -0
- nexaai/mlx_backend/mlx_audio/codec/models/encodec/__init__.py +1 -0
- nexaai/mlx_backend/mlx_audio/codec/models/encodec/encodec.py +777 -0
- nexaai/mlx_backend/mlx_audio/codec/models/mimi/__init__.py +1 -0
- nexaai/mlx_backend/mlx_audio/codec/models/mimi/mimi.py +286 -0
- nexaai/mlx_backend/mlx_audio/codec/models/mimi/modules/__init__.py +20 -0
- nexaai/mlx_backend/mlx_audio/codec/models/mimi/modules/conv.py +398 -0
- nexaai/mlx_backend/mlx_audio/codec/models/mimi/modules/kv_cache.py +199 -0
- nexaai/mlx_backend/mlx_audio/codec/models/mimi/modules/quantization.py +179 -0
- nexaai/mlx_backend/mlx_audio/codec/models/mimi/modules/seanet.py +314 -0
- nexaai/mlx_backend/mlx_audio/codec/models/mimi/modules/transformer.py +256 -0
- nexaai/mlx_backend/mlx_audio/codec/models/s3/__init__.py +1 -0
- nexaai/mlx_backend/mlx_audio/codec/models/s3/model.py +260 -0
- nexaai/mlx_backend/mlx_audio/codec/models/s3/model_v2.py +383 -0
- nexaai/mlx_backend/mlx_audio/codec/models/s3/utils.py +122 -0
- nexaai/mlx_backend/mlx_audio/codec/models/snac/__init__.py +1 -0
- nexaai/mlx_backend/mlx_audio/codec/models/snac/attention.py +97 -0
- nexaai/mlx_backend/mlx_audio/codec/models/snac/layers.py +306 -0
- nexaai/mlx_backend/mlx_audio/codec/models/snac/snac.py +154 -0
- nexaai/mlx_backend/mlx_audio/codec/models/snac/vq.py +135 -0
- nexaai/mlx_backend/mlx_audio/codec/models/vocos/__init__.py +1 -0
- nexaai/mlx_backend/mlx_audio/codec/models/vocos/mel.py +33 -0
- nexaai/mlx_backend/mlx_audio/codec/models/vocos/vocos.py +359 -0
- nexaai/mlx_backend/mlx_audio/codec/tests/__init__.py +0 -0
- nexaai/mlx_backend/mlx_audio/codec/tests/test_bigvgan.py +54 -0
- nexaai/mlx_backend/mlx_audio/codec/tests/test_descript.py +109 -0
- nexaai/mlx_backend/mlx_audio/codec/tests/test_encodec.py +58 -0
- nexaai/mlx_backend/mlx_audio/codec/tests/test_mimi.py +22 -0
- nexaai/mlx_backend/mlx_audio/codec/tests/test_s3.py +25 -0
- nexaai/mlx_backend/mlx_audio/codec/tests/test_snac.py +40 -0
- nexaai/mlx_backend/mlx_audio/codec/tests/test_vocos.py +93 -0
- nexaai/mlx_backend/mlx_audio/server.py +525 -0
- nexaai/mlx_backend/mlx_audio/sts/__init__.py +0 -0
- nexaai/mlx_backend/mlx_audio/sts/tests/test_voice_pipeline.py +156 -0
- nexaai/mlx_backend/mlx_audio/sts/voice_pipeline.py +327 -0
- nexaai/mlx_backend/mlx_audio/stt/__init__.py +0 -0
- nexaai/mlx_backend/mlx_audio/stt/generate.py +174 -0
- nexaai/mlx_backend/mlx_audio/stt/models/__init__.py +0 -0
- nexaai/mlx_backend/mlx_audio/stt/models/parakeet/__init__.py +1 -0
- nexaai/mlx_backend/mlx_audio/stt/models/parakeet/alignment.py +248 -0
- nexaai/mlx_backend/mlx_audio/stt/models/parakeet/attention.py +187 -0
- nexaai/mlx_backend/mlx_audio/stt/models/parakeet/audio.py +76 -0
- nexaai/mlx_backend/mlx_audio/stt/models/parakeet/conformer.py +331 -0
- nexaai/mlx_backend/mlx_audio/stt/models/parakeet/ctc.py +34 -0
- nexaai/mlx_backend/mlx_audio/stt/models/parakeet/parakeet.py +604 -0
- nexaai/mlx_backend/mlx_audio/stt/models/parakeet/rnnt.py +157 -0
- nexaai/mlx_backend/mlx_audio/stt/models/parakeet/tokenizer.py +2 -0
- nexaai/mlx_backend/mlx_audio/stt/models/wav2vec/feature_extractor.py +757 -0
- nexaai/mlx_backend/mlx_audio/stt/models/wav2vec/wav2vec.py +738 -0
- nexaai/mlx_backend/mlx_audio/stt/models/whisper/__init__.py +1 -0
- nexaai/mlx_backend/mlx_audio/stt/models/whisper/audio.py +82 -0
- nexaai/mlx_backend/mlx_audio/stt/models/whisper/decoding.py +742 -0
- nexaai/mlx_backend/mlx_audio/stt/models/whisper/timing.py +329 -0
- nexaai/mlx_backend/mlx_audio/stt/models/whisper/tokenizer.py +398 -0
- nexaai/mlx_backend/mlx_audio/stt/models/whisper/whisper.py +862 -0
- nexaai/mlx_backend/mlx_audio/stt/models/whisper/writers.py +268 -0
- nexaai/mlx_backend/mlx_audio/stt/tests/test_models.py +381 -0
- nexaai/mlx_backend/mlx_audio/stt/utils.py +195 -0
- nexaai/mlx_backend/mlx_audio/tts/__init__.py +1 -0
- nexaai/mlx_backend/mlx_audio/tts/audio_player.py +120 -0
- nexaai/mlx_backend/mlx_audio/tts/convert.py +71 -0
- nexaai/mlx_backend/mlx_audio/tts/generate.py +449 -0
- nexaai/mlx_backend/mlx_audio/tts/models/__init__.py +0 -0
- nexaai/mlx_backend/mlx_audio/tts/models/bark/__init__.py +4 -0
- nexaai/mlx_backend/mlx_audio/tts/models/bark/bark.py +528 -0
- nexaai/mlx_backend/mlx_audio/tts/models/bark/isftnet.py +12 -0
- nexaai/mlx_backend/mlx_audio/tts/models/bark/pipeline.py +442 -0
- nexaai/mlx_backend/mlx_audio/tts/models/base.py +84 -0
- nexaai/mlx_backend/mlx_audio/tts/models/dia/__init__.py +1 -0
- nexaai/mlx_backend/mlx_audio/tts/models/dia/audio.py +287 -0
- nexaai/mlx_backend/mlx_audio/tts/models/dia/config.py +256 -0
- nexaai/mlx_backend/mlx_audio/tts/models/dia/dia.py +592 -0
- nexaai/mlx_backend/mlx_audio/tts/models/dia/layers.py +870 -0
- nexaai/mlx_backend/mlx_audio/tts/models/indextts/__init__.py +3 -0
- nexaai/mlx_backend/mlx_audio/tts/models/indextts/attention.py +180 -0
- nexaai/mlx_backend/mlx_audio/tts/models/indextts/bigvgan.py +124 -0
- nexaai/mlx_backend/mlx_audio/tts/models/indextts/conformer.py +247 -0
- nexaai/mlx_backend/mlx_audio/tts/models/indextts/ecapa_tdnn/__init__.py +0 -0
- nexaai/mlx_backend/mlx_audio/tts/models/indextts/ecapa_tdnn/asp.py +59 -0
- nexaai/mlx_backend/mlx_audio/tts/models/indextts/ecapa_tdnn/ecapa_tdnn.py +91 -0
- nexaai/mlx_backend/mlx_audio/tts/models/indextts/ecapa_tdnn/se_res2net.py +132 -0
- nexaai/mlx_backend/mlx_audio/tts/models/indextts/ecapa_tdnn/tdnn.py +42 -0
- nexaai/mlx_backend/mlx_audio/tts/models/indextts/gpt2.py +38 -0
- nexaai/mlx_backend/mlx_audio/tts/models/indextts/indextts.py +412 -0
- nexaai/mlx_backend/mlx_audio/tts/models/indextts/mel.py +37 -0
- nexaai/mlx_backend/mlx_audio/tts/models/indextts/normalize.py +294 -0
- nexaai/mlx_backend/mlx_audio/tts/models/indextts/perceiver.py +62 -0
- nexaai/mlx_backend/mlx_audio/tts/models/interpolate.py +108 -0
- nexaai/mlx_backend/mlx_audio/tts/models/kokoro/__init__.py +4 -0
- nexaai/mlx_backend/mlx_audio/tts/models/kokoro/istftnet.py +979 -0
- nexaai/mlx_backend/mlx_audio/tts/models/kokoro/kokoro.py +331 -0
- nexaai/mlx_backend/mlx_audio/tts/models/kokoro/modules.py +659 -0
- nexaai/mlx_backend/mlx_audio/tts/models/kokoro/pipeline.py +453 -0
- nexaai/mlx_backend/mlx_audio/tts/models/kokoro/voice.py +113 -0
- nexaai/mlx_backend/mlx_audio/tts/models/llama/__init__.py +3 -0
- nexaai/mlx_backend/mlx_audio/tts/models/llama/llama.py +324 -0
- nexaai/mlx_backend/mlx_audio/tts/models/outetts/__init__.py +1 -0
- nexaai/mlx_backend/mlx_audio/tts/models/outetts/audio_processor.py +351 -0
- nexaai/mlx_backend/mlx_audio/tts/models/outetts/dac_interface.py +162 -0
- nexaai/mlx_backend/mlx_audio/tts/models/outetts/default_speaker.json +461 -0
- nexaai/mlx_backend/mlx_audio/tts/models/outetts/outetts.py +255 -0
- nexaai/mlx_backend/mlx_audio/tts/models/outetts/prompt_processor.py +181 -0
- nexaai/mlx_backend/mlx_audio/tts/models/outetts/tokens.py +36 -0
- nexaai/mlx_backend/mlx_audio/tts/models/sesame/__init__.py +3 -0
- nexaai/mlx_backend/mlx_audio/tts/models/sesame/attention.py +195 -0
- nexaai/mlx_backend/mlx_audio/tts/models/sesame/sesame.py +633 -0
- nexaai/mlx_backend/mlx_audio/tts/models/sesame/watermarking.py +105 -0
- nexaai/mlx_backend/mlx_audio/tts/models/spark/__init__.py +1 -0
- nexaai/mlx_backend/mlx_audio/tts/models/spark/audio_tokenizer.py +138 -0
- nexaai/mlx_backend/mlx_audio/tts/models/spark/bicodec.py +269 -0
- nexaai/mlx_backend/mlx_audio/tts/models/spark/modules/__init__.py +0 -0
- nexaai/mlx_backend/mlx_audio/tts/models/spark/modules/blocks/__init__.py +0 -0
- nexaai/mlx_backend/mlx_audio/tts/models/spark/modules/blocks/sampler.py +111 -0
- nexaai/mlx_backend/mlx_audio/tts/models/spark/modules/encoder_decoder/__init__.py +0 -0
- nexaai/mlx_backend/mlx_audio/tts/models/spark/modules/encoder_decoder/feat_decoder.py +120 -0
- nexaai/mlx_backend/mlx_audio/tts/models/spark/modules/encoder_decoder/feat_encoder.py +136 -0
- nexaai/mlx_backend/mlx_audio/tts/models/spark/modules/encoder_decoder/wave_generator.py +113 -0
- nexaai/mlx_backend/mlx_audio/tts/models/spark/modules/finite_scalar_quantization.py +238 -0
- nexaai/mlx_backend/mlx_audio/tts/models/spark/modules/residual.py +209 -0
- nexaai/mlx_backend/mlx_audio/tts/models/spark/modules/residual_fsq.py +309 -0
- nexaai/mlx_backend/mlx_audio/tts/models/spark/modules/speaker/__init__.py +1 -0
- nexaai/mlx_backend/mlx_audio/tts/models/spark/modules/speaker/ecapa_tdnn.py +283 -0
- nexaai/mlx_backend/mlx_audio/tts/models/spark/modules/speaker/perceiver_encoder.py +326 -0
- nexaai/mlx_backend/mlx_audio/tts/models/spark/modules/speaker/pooling_layers.py +297 -0
- nexaai/mlx_backend/mlx_audio/tts/models/spark/modules/speaker/speaker_encoder.py +155 -0
- nexaai/mlx_backend/mlx_audio/tts/models/spark/spark.py +382 -0
- nexaai/mlx_backend/mlx_audio/tts/models/spark/utils/audio.py +220 -0
- nexaai/mlx_backend/mlx_audio/tts/models/spark/utils/file.py +221 -0
- nexaai/mlx_backend/mlx_audio/tts/models/spark/utils/token_parser.py +181 -0
- nexaai/mlx_backend/mlx_audio/tts/tests/__init__.py +0 -0
- nexaai/mlx_backend/mlx_audio/tts/tests/test_base.py +66 -0
- nexaai/mlx_backend/mlx_audio/tts/tests/test_convert.py +173 -0
- nexaai/mlx_backend/mlx_audio/tts/tests/test_interpolate.py +88 -0
- nexaai/mlx_backend/mlx_audio/tts/tests/test_models.py +974 -0
- nexaai/mlx_backend/mlx_audio/tts/utils.py +337 -0
- nexaai/mlx_backend/mlx_audio/utils.py +237 -0
- nexaai/mlx_backend/mlx_audio/version.py +1 -0
- nexaai/mlx_backend/profiling.py +239 -0
- nexaai/mlx_backend/rerank/__init__.py +0 -0
- nexaai/mlx_backend/rerank/generate.py +174 -0
- nexaai/mlx_backend/rerank/interface.py +287 -0
- nexaai/mlx_backend/rerank/main.py +127 -0
- nexaai/mlx_backend/rerank/modeling/__init__.py +0 -0
- nexaai/mlx_backend/rerank/modeling/nexa_jina_rerank.py +330 -0
- nexaai/mlx_backend/sd/__init__.py +1 -0
- nexaai/mlx_backend/sd/interface.py +362 -0
- nexaai/mlx_backend/sd/main.py +286 -0
- nexaai/mlx_backend/sd/modeling/__init__.py +306 -0
- nexaai/mlx_backend/sd/modeling/clip.py +116 -0
- nexaai/mlx_backend/sd/modeling/config.py +65 -0
- nexaai/mlx_backend/sd/modeling/model_io.py +385 -0
- nexaai/mlx_backend/sd/modeling/sampler.py +105 -0
- nexaai/mlx_backend/sd/modeling/tokenizer.py +100 -0
- nexaai/mlx_backend/sd/modeling/unet.py +460 -0
- nexaai/mlx_backend/sd/modeling/vae.py +274 -0
- nexaai/mlx_backend/tts/__init__.py +12 -0
- nexaai/mlx_backend/tts/interface.py +276 -0
- nexaai/mlx_backend/vlm/__init__.py +3 -0
- nexaai/mlx_backend/vlm/generate.py +572 -0
- nexaai/mlx_backend/vlm/generate_qwen3_vl.py +261 -0
- nexaai/mlx_backend/vlm/interface.py +415 -0
- nexaai/mlx_backend/vlm/main.py +316 -0
- nexaai/mlx_backend/vlm/modeling/__init__.py +0 -0
- nexaai/mlx_backend/vlm/modeling/convert.py +68 -0
- nexaai/mlx_backend/vlm/modeling/models/__init__.py +0 -0
- nexaai/mlx_backend/vlm/modeling/models/aya_vision/__init__.py +8 -0
- nexaai/mlx_backend/vlm/modeling/models/aya_vision/aya_vision.py +193 -0
- nexaai/mlx_backend/vlm/modeling/models/aya_vision/interpolate.py +186 -0
- nexaai/mlx_backend/vlm/modeling/models/aya_vision/language.py +233 -0
- nexaai/mlx_backend/vlm/modeling/models/aya_vision/vision.py +503 -0
- nexaai/mlx_backend/vlm/modeling/models/base.py +202 -0
- nexaai/mlx_backend/vlm/modeling/models/cache.py +230 -0
- nexaai/mlx_backend/vlm/modeling/models/deepseek_vl_v2/__init__.py +10 -0
- nexaai/mlx_backend/vlm/modeling/models/deepseek_vl_v2/conversation.py +264 -0
- nexaai/mlx_backend/vlm/modeling/models/deepseek_vl_v2/deepseek_vl_v2.py +472 -0
- nexaai/mlx_backend/vlm/modeling/models/deepseek_vl_v2/language.py +591 -0
- nexaai/mlx_backend/vlm/modeling/models/deepseek_vl_v2/processing_deepsek_vl_v2.py +526 -0
- nexaai/mlx_backend/vlm/modeling/models/deepseek_vl_v2/vision.py +356 -0
- nexaai/mlx_backend/vlm/modeling/models/florence2/__init__.py +8 -0
- nexaai/mlx_backend/vlm/modeling/models/florence2/florence2.py +366 -0
- nexaai/mlx_backend/vlm/modeling/models/florence2/language.py +488 -0
- nexaai/mlx_backend/vlm/modeling/models/florence2/vision.py +591 -0
- nexaai/mlx_backend/vlm/modeling/models/gemma3/__init__.py +8 -0
- nexaai/mlx_backend/vlm/modeling/models/gemma3/gemma3.py +213 -0
- nexaai/mlx_backend/vlm/modeling/models/gemma3/language.py +315 -0
- nexaai/mlx_backend/vlm/modeling/models/gemma3/vision.py +238 -0
- nexaai/mlx_backend/vlm/modeling/models/gemma3n/__init__.py +2 -0
- nexaai/mlx_backend/vlm/modeling/models/gemma3n/audio.py +1038 -0
- nexaai/mlx_backend/vlm/modeling/models/gemma3n/config.py +139 -0
- nexaai/mlx_backend/vlm/modeling/models/gemma3n/gemma3n.py +322 -0
- nexaai/mlx_backend/vlm/modeling/models/gemma3n/language.py +629 -0
- nexaai/mlx_backend/vlm/modeling/models/gemma3n/vision.py +1022 -0
- nexaai/mlx_backend/vlm/modeling/models/idefics2/__init__.py +9 -0
- nexaai/mlx_backend/vlm/modeling/models/idefics2/idefics2.py +294 -0
- nexaai/mlx_backend/vlm/modeling/models/idefics2/language.py +191 -0
- nexaai/mlx_backend/vlm/modeling/models/idefics2/vision.py +267 -0
- nexaai/mlx_backend/vlm/modeling/models/idefics3/__init__.py +8 -0
- nexaai/mlx_backend/vlm/modeling/models/idefics3/idefics3.py +175 -0
- nexaai/mlx_backend/vlm/modeling/models/idefics3/language.py +192 -0
- nexaai/mlx_backend/vlm/modeling/models/idefics3/vision.py +233 -0
- nexaai/mlx_backend/vlm/modeling/models/internvl_chat/__init__.py +9 -0
- nexaai/mlx_backend/vlm/modeling/models/internvl_chat/internvl_chat.py +140 -0
- nexaai/mlx_backend/vlm/modeling/models/internvl_chat/language.py +220 -0
- nexaai/mlx_backend/vlm/modeling/models/internvl_chat/processor.py +393 -0
- nexaai/mlx_backend/vlm/modeling/models/internvl_chat/vision.py +293 -0
- nexaai/mlx_backend/vlm/modeling/models/kernels.py +307 -0
- nexaai/mlx_backend/vlm/modeling/models/kimi_vl/__init__.py +8 -0
- nexaai/mlx_backend/vlm/modeling/models/kimi_vl/kimi_vl.py +143 -0
- nexaai/mlx_backend/vlm/modeling/models/kimi_vl/language.py +509 -0
- nexaai/mlx_backend/vlm/modeling/models/kimi_vl/vision.py +522 -0
- nexaai/mlx_backend/vlm/modeling/models/llama4/__init__.py +8 -0
- nexaai/mlx_backend/vlm/modeling/models/llama4/language.py +386 -0
- nexaai/mlx_backend/vlm/modeling/models/llama4/llama4.py +138 -0
- nexaai/mlx_backend/vlm/modeling/models/llama4/vision.py +560 -0
- nexaai/mlx_backend/vlm/modeling/models/llava/__init__.py +8 -0
- nexaai/mlx_backend/vlm/modeling/models/llava/language.py +240 -0
- nexaai/mlx_backend/vlm/modeling/models/llava/llava.py +153 -0
- nexaai/mlx_backend/vlm/modeling/models/llava/vision.py +259 -0
- nexaai/mlx_backend/vlm/modeling/models/llava_bunny/__init__.py +9 -0
- nexaai/mlx_backend/vlm/modeling/models/llava_bunny/language.py +236 -0
- nexaai/mlx_backend/vlm/modeling/models/llava_bunny/llava_bunny.py +256 -0
- nexaai/mlx_backend/vlm/modeling/models/llava_bunny/vision.py +303 -0
- nexaai/mlx_backend/vlm/modeling/models/llava_next/__init__.py +8 -0
- nexaai/mlx_backend/vlm/modeling/models/llava_next/language.py +230 -0
- nexaai/mlx_backend/vlm/modeling/models/llava_next/llava_next.py +160 -0
- nexaai/mlx_backend/vlm/modeling/models/llava_next/vision.py +243 -0
- nexaai/mlx_backend/vlm/modeling/models/mistral3/__init__.py +8 -0
- nexaai/mlx_backend/vlm/modeling/models/mistral3/mistral3.py +283 -0
- nexaai/mlx_backend/vlm/modeling/models/mllama/__init__.py +8 -0
- nexaai/mlx_backend/vlm/modeling/models/mllama/language.py +416 -0
- nexaai/mlx_backend/vlm/modeling/models/mllama/mllama.py +172 -0
- nexaai/mlx_backend/vlm/modeling/models/mllama/vision.py +499 -0
- nexaai/mlx_backend/vlm/modeling/models/molmo/__init__.py +8 -0
- nexaai/mlx_backend/vlm/modeling/models/molmo/language.py +243 -0
- nexaai/mlx_backend/vlm/modeling/models/molmo/molmo.py +133 -0
- nexaai/mlx_backend/vlm/modeling/models/molmo/vision.py +465 -0
- nexaai/mlx_backend/vlm/modeling/models/multi_modality/__init__.py +10 -0
- nexaai/mlx_backend/vlm/modeling/models/multi_modality/language.py +230 -0
- nexaai/mlx_backend/vlm/modeling/models/multi_modality/multi_modality.py +385 -0
- nexaai/mlx_backend/vlm/modeling/models/multi_modality/sam.py +557 -0
- nexaai/mlx_backend/vlm/modeling/models/multi_modality/vision.py +526 -0
- nexaai/mlx_backend/vlm/modeling/models/paligemma/__init__.py +8 -0
- nexaai/mlx_backend/vlm/modeling/models/paligemma/language.py +282 -0
- nexaai/mlx_backend/vlm/modeling/models/paligemma/paligemma.py +160 -0
- nexaai/mlx_backend/vlm/modeling/models/paligemma/vision.py +242 -0
- nexaai/mlx_backend/vlm/modeling/models/phi3_v/__init__.py +8 -0
- nexaai/mlx_backend/vlm/modeling/models/phi3_v/language.py +21 -0
- nexaai/mlx_backend/vlm/modeling/models/phi3_v/phi3_v.py +243 -0
- nexaai/mlx_backend/vlm/modeling/models/phi3_v/su_rope.py +71 -0
- nexaai/mlx_backend/vlm/modeling/models/phi3_v/vision.py +324 -0
- nexaai/mlx_backend/vlm/modeling/models/pixtral/__init__.py +8 -0
- nexaai/mlx_backend/vlm/modeling/models/pixtral/language.py +229 -0
- nexaai/mlx_backend/vlm/modeling/models/pixtral/pixtral.py +161 -0
- nexaai/mlx_backend/vlm/modeling/models/pixtral/vision.py +320 -0
- nexaai/mlx_backend/vlm/modeling/models/qwen2_5_vl/__init__.py +2 -0
- nexaai/mlx_backend/vlm/modeling/models/qwen2_5_vl/config.py +108 -0
- nexaai/mlx_backend/vlm/modeling/models/qwen2_5_vl/language.py +490 -0
- nexaai/mlx_backend/vlm/modeling/models/qwen2_5_vl/qwen2_5_vl.py +168 -0
- nexaai/mlx_backend/vlm/modeling/models/qwen2_5_vl/vision.py +414 -0
- nexaai/mlx_backend/vlm/modeling/models/qwen2_vl/__init__.py +2 -0
- nexaai/mlx_backend/vlm/modeling/models/qwen2_vl/config.py +104 -0
- nexaai/mlx_backend/vlm/modeling/models/qwen2_vl/language.py +490 -0
- nexaai/mlx_backend/vlm/modeling/models/qwen2_vl/qwen2_vl.py +167 -0
- nexaai/mlx_backend/vlm/modeling/models/qwen2_vl/vision.py +312 -0
- nexaai/mlx_backend/vlm/modeling/models/qwen3_vl/llm_common/__init__.py +0 -0
- nexaai/mlx_backend/vlm/modeling/models/qwen3_vl/llm_common/base.py +117 -0
- nexaai/mlx_backend/vlm/modeling/models/qwen3_vl/llm_common/cache.py +531 -0
- nexaai/mlx_backend/vlm/modeling/models/qwen3_vl/llm_common/generate.py +701 -0
- nexaai/mlx_backend/vlm/modeling/models/qwen3_vl/llm_common/rope_utils.py +255 -0
- nexaai/mlx_backend/vlm/modeling/models/qwen3_vl/llm_common/sample_utils.py +303 -0
- nexaai/mlx_backend/vlm/modeling/models/qwen3_vl/llm_common/tokenizer_utils.py +407 -0
- nexaai/mlx_backend/vlm/modeling/models/qwen3_vl/processor.py +476 -0
- nexaai/mlx_backend/vlm/modeling/models/qwen3_vl/qwen3vl.py +1223 -0
- nexaai/mlx_backend/vlm/modeling/models/smolvlm/__init__.py +8 -0
- nexaai/mlx_backend/vlm/modeling/models/smolvlm/smolvlm.py +62 -0
- nexaai/mlx_backend/vlm/modeling/processing_qwen2_5_vl.py +209 -0
- nexaai/mlx_backend/vlm/modeling/processing_qwen2_vl.py +215 -0
- nexaai/mlx_backend/vlm/modeling/prompt_utils.py +474 -0
- nexaai/mlx_backend/vlm/modeling/sample_utils.py +39 -0
- nexaai/mlx_backend/vlm/modeling/tokenizer_utils.py +344 -0
- nexaai/mlx_backend/vlm/modeling/trainer/__init__.py +9 -0
- nexaai/mlx_backend/vlm/modeling/trainer/lora.py +70 -0
- nexaai/mlx_backend/vlm/modeling/trainer/trainer.py +296 -0
- nexaai/mlx_backend/vlm/modeling/trainer/utils.py +160 -0
- nexaai/mlx_backend/vlm/modeling/utils.py +928 -0
- nexaai/rerank.py +55 -0
- nexaai/rerank_impl/__init__.py +0 -0
- nexaai/rerank_impl/mlx_rerank_impl.py +92 -0
- nexaai/rerank_impl/pybind_rerank_impl.py +43 -0
- nexaai/runtime.py +68 -0
- nexaai/tts.py +74 -0
- nexaai/tts_impl/__init__.py +0 -0
- nexaai/tts_impl/mlx_tts_impl.py +94 -0
- nexaai/tts_impl/pybind_tts_impl.py +43 -0
- nexaai/utils/avatar_fetcher.py +104 -0
- nexaai/utils/decode.py +18 -0
- nexaai/utils/manifest_utils.py +324 -0
- nexaai/utils/model_manager.py +1353 -0
- nexaai/utils/model_types.py +47 -0
- nexaai/utils/progress_tracker.py +385 -0
- nexaai/utils/quantization_utils.py +245 -0
- nexaai/vlm.py +128 -0
- nexaai/vlm_impl/__init__.py +0 -0
- nexaai/vlm_impl/mlx_vlm_impl.py +258 -0
- nexaai/vlm_impl/pybind_vlm_impl.py +230 -0
- nexaai-1.0.16rc13.dist-info/METADATA +32 -0
- nexaai-1.0.16rc13.dist-info/RECORD +557 -0
- nexaai-1.0.16rc13.dist-info/WHEEL +5 -0
- nexaai-1.0.16rc13.dist-info/top_level.txt +1 -0
nexaai/rerank.py
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
from typing import List, Optional, Sequence, Union
|
|
2
|
+
from abc import abstractmethod
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
|
|
5
|
+
from nexaai.base import BaseModel
|
|
6
|
+
from nexaai.common import PluginID
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@dataclass
|
|
10
|
+
class RerankConfig:
|
|
11
|
+
"""Configuration for reranking."""
|
|
12
|
+
batch_size: int = 1
|
|
13
|
+
normalize: bool = True
|
|
14
|
+
normalize_method: str = "softmax" # "softmax" | "min-max" | "none"
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class Reranker(BaseModel):
|
|
18
|
+
"""Abstract base class for reranker models."""
|
|
19
|
+
|
|
20
|
+
def __init__(self):
|
|
21
|
+
"""Initialize base Reranker class."""
|
|
22
|
+
pass
|
|
23
|
+
|
|
24
|
+
@classmethod
|
|
25
|
+
def _load_from(cls,
|
|
26
|
+
model_path: str,
|
|
27
|
+
tokenizer_file: str = "tokenizer.json",
|
|
28
|
+
plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
|
|
29
|
+
device_id: Optional[str] = None
|
|
30
|
+
) -> 'Reranker':
|
|
31
|
+
"""Load reranker model from local path, routing to appropriate implementation."""
|
|
32
|
+
# Check plugin_id value for routing - handle both enum and string
|
|
33
|
+
plugin_value = plugin_id.value if isinstance(plugin_id, PluginID) else plugin_id
|
|
34
|
+
|
|
35
|
+
if plugin_value == "mlx":
|
|
36
|
+
from nexaai.rerank_impl.mlx_rerank_impl import MLXRerankImpl
|
|
37
|
+
return MLXRerankImpl._load_from(model_path, tokenizer_file, plugin_id, device_id)
|
|
38
|
+
else:
|
|
39
|
+
from nexaai.rerank_impl.pybind_rerank_impl import PyBindRerankImpl
|
|
40
|
+
return PyBindRerankImpl._load_from(model_path, tokenizer_file, plugin_id, device_id)
|
|
41
|
+
|
|
42
|
+
@abstractmethod
|
|
43
|
+
def load_model(self, model_path: str, extra_data: Optional[str] = None) -> bool:
|
|
44
|
+
"""Load model from path."""
|
|
45
|
+
pass
|
|
46
|
+
|
|
47
|
+
@abstractmethod
|
|
48
|
+
def rerank(
|
|
49
|
+
self,
|
|
50
|
+
query: str,
|
|
51
|
+
documents: Sequence[str],
|
|
52
|
+
config: Optional[RerankConfig] = None,
|
|
53
|
+
) -> List[float]:
|
|
54
|
+
"""Rerank documents given a query."""
|
|
55
|
+
pass
|
|
File without changes
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
# Note: This code is generated by Cursor, not tested yet.
|
|
2
|
+
|
|
3
|
+
from typing import List, Optional, Sequence, Union
|
|
4
|
+
import os
|
|
5
|
+
|
|
6
|
+
from nexaai.common import PluginID
|
|
7
|
+
from nexaai.rerank import Reranker, RerankConfig
|
|
8
|
+
from nexaai.mlx_backend.rerank.interface import Reranker as MLXRerankInterface, create_reranker
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class MLXRerankImpl(Reranker):
|
|
12
|
+
def __init__(self):
|
|
13
|
+
"""Initialize MLX Rerank implementation."""
|
|
14
|
+
super().__init__()
|
|
15
|
+
self._mlx_reranker = None
|
|
16
|
+
|
|
17
|
+
@classmethod
|
|
18
|
+
def _load_from(cls,
|
|
19
|
+
model_path: str,
|
|
20
|
+
tokenizer_file: str = "tokenizer.json",
|
|
21
|
+
plugin_id: Union[PluginID, str] = PluginID.MLX,
|
|
22
|
+
device_id: Optional[str] = None
|
|
23
|
+
) -> 'MLXRerankImpl':
|
|
24
|
+
"""Load reranker model from local path using MLX backend."""
|
|
25
|
+
try:
|
|
26
|
+
# MLX Rerank interfaces are already imported
|
|
27
|
+
|
|
28
|
+
# Create instance and load MLX reranker
|
|
29
|
+
instance = cls()
|
|
30
|
+
instance._mlx_reranker = create_reranker(
|
|
31
|
+
model_path=model_path,
|
|
32
|
+
tokenizer_path=tokenizer_file,
|
|
33
|
+
device=device_id
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
# Load the model
|
|
37
|
+
success = instance._mlx_reranker.load_model(model_path)
|
|
38
|
+
if not success:
|
|
39
|
+
raise RuntimeError("Failed to load MLX reranker model")
|
|
40
|
+
|
|
41
|
+
return instance
|
|
42
|
+
except Exception as e:
|
|
43
|
+
raise RuntimeError(f"Failed to load MLX Reranker: {str(e)}")
|
|
44
|
+
|
|
45
|
+
def eject(self):
|
|
46
|
+
"""Destroy the model and free resources."""
|
|
47
|
+
if self._mlx_reranker:
|
|
48
|
+
self._mlx_reranker.destroy()
|
|
49
|
+
self._mlx_reranker = None
|
|
50
|
+
|
|
51
|
+
def load_model(self, model_path: str, extra_data: Optional[str] = None) -> bool:
|
|
52
|
+
"""Load model from path."""
|
|
53
|
+
if not self._mlx_reranker:
|
|
54
|
+
raise RuntimeError("MLX Reranker not initialized")
|
|
55
|
+
|
|
56
|
+
try:
|
|
57
|
+
return self._mlx_reranker.load_model(model_path, extra_data)
|
|
58
|
+
except Exception as e:
|
|
59
|
+
raise RuntimeError(f"Failed to load reranker model: {str(e)}")
|
|
60
|
+
|
|
61
|
+
def rerank(
|
|
62
|
+
self,
|
|
63
|
+
query: str,
|
|
64
|
+
documents: Sequence[str],
|
|
65
|
+
config: Optional[RerankConfig] = None,
|
|
66
|
+
) -> List[float]:
|
|
67
|
+
"""Rerank documents given a query."""
|
|
68
|
+
if not self._mlx_reranker:
|
|
69
|
+
raise RuntimeError("MLX Reranker not loaded")
|
|
70
|
+
|
|
71
|
+
try:
|
|
72
|
+
# Convert our config to MLX format if provided
|
|
73
|
+
mlx_config = None
|
|
74
|
+
if config:
|
|
75
|
+
from nexaai.mlx_backend.rerank.interface import RerankConfig as MLXRerankConfig
|
|
76
|
+
|
|
77
|
+
mlx_config = MLXRerankConfig(
|
|
78
|
+
batch_size=config.batch_size,
|
|
79
|
+
normalize=config.normalize,
|
|
80
|
+
normalize_method=config.normalize_method
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
# Use MLX reranking
|
|
84
|
+
scores = self._mlx_reranker.rerank(query, documents, mlx_config)
|
|
85
|
+
|
|
86
|
+
# Convert mx.array to Python list of floats
|
|
87
|
+
return scores.tolist()
|
|
88
|
+
|
|
89
|
+
except Exception as e:
|
|
90
|
+
raise RuntimeError(f"Failed to rerank documents: {str(e)}")
|
|
91
|
+
|
|
92
|
+
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
from typing import List, Optional, Sequence, Union
|
|
2
|
+
|
|
3
|
+
from nexaai.common import PluginID
|
|
4
|
+
from nexaai.rerank import Reranker, RerankConfig
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class PyBindRerankImpl(Reranker):
|
|
8
|
+
def __init__(self):
|
|
9
|
+
"""Initialize PyBind Rerank implementation."""
|
|
10
|
+
super().__init__()
|
|
11
|
+
# TODO: Add PyBind-specific initialization
|
|
12
|
+
|
|
13
|
+
@classmethod
|
|
14
|
+
def _load_from(cls,
|
|
15
|
+
model_path: str,
|
|
16
|
+
tokenizer_file: str = "tokenizer.json",
|
|
17
|
+
plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
|
|
18
|
+
device_id: Optional[str] = None
|
|
19
|
+
) -> 'PyBindRerankImpl':
|
|
20
|
+
"""Load reranker model from local path using PyBind backend."""
|
|
21
|
+
# TODO: Implement PyBind reranker loading
|
|
22
|
+
instance = cls()
|
|
23
|
+
return instance
|
|
24
|
+
|
|
25
|
+
def eject(self):
|
|
26
|
+
"""Destroy the model and free resources."""
|
|
27
|
+
# TODO: Implement PyBind reranker cleanup
|
|
28
|
+
pass
|
|
29
|
+
|
|
30
|
+
def load_model(self, model_path: str, extra_data: Optional[str] = None) -> bool:
|
|
31
|
+
"""Load model from path."""
|
|
32
|
+
# TODO: Implement PyBind reranker model loading
|
|
33
|
+
raise NotImplementedError("PyBind reranker model loading not yet implemented")
|
|
34
|
+
|
|
35
|
+
def rerank(
|
|
36
|
+
self,
|
|
37
|
+
query: str,
|
|
38
|
+
documents: Sequence[str],
|
|
39
|
+
config: Optional[RerankConfig] = None,
|
|
40
|
+
) -> List[float]:
|
|
41
|
+
"""Rerank documents given a query."""
|
|
42
|
+
# TODO: Implement PyBind reranking
|
|
43
|
+
raise NotImplementedError("PyBind reranking not yet implemented")
|
nexaai/runtime.py
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
import atexit
|
|
3
|
+
import threading
|
|
4
|
+
from typing import Optional, Any
|
|
5
|
+
|
|
6
|
+
from nexaai.binds import common_bind
|
|
7
|
+
|
|
8
|
+
_init_lock = threading.Lock()
|
|
9
|
+
_runtime_alive = False # global flag
|
|
10
|
+
|
|
11
|
+
def _ensure_runtime() -> None:
|
|
12
|
+
"""Initialise the runtime exactly once (thread‑safe, lazy)."""
|
|
13
|
+
global _runtime_alive
|
|
14
|
+
if not _runtime_alive:
|
|
15
|
+
with _init_lock:
|
|
16
|
+
if not _runtime_alive: # double‑checked locking
|
|
17
|
+
common_bind.ml_init()
|
|
18
|
+
_runtime_alive = True
|
|
19
|
+
atexit.register(_shutdown_runtime)
|
|
20
|
+
|
|
21
|
+
def _shutdown_runtime() -> None:
|
|
22
|
+
"""Tear the runtime down; idempotent and registered with atexit."""
|
|
23
|
+
global _runtime_alive
|
|
24
|
+
if _runtime_alive:
|
|
25
|
+
common_bind.ml_deinit()
|
|
26
|
+
_runtime_alive = False
|
|
27
|
+
|
|
28
|
+
# Public helper so advanced users can reclaim memory on demand
|
|
29
|
+
shutdown = _shutdown_runtime
|
|
30
|
+
|
|
31
|
+
def is_initialized() -> bool:
|
|
32
|
+
"""Check if the runtime has been initialized."""
|
|
33
|
+
return _runtime_alive
|
|
34
|
+
|
|
35
|
+
# ----------------------------------------------------------------------
|
|
36
|
+
# Single public class
|
|
37
|
+
# ----------------------------------------------------------------------
|
|
38
|
+
class Session:
|
|
39
|
+
"""
|
|
40
|
+
Model session **and** runtime guard in one object.
|
|
41
|
+
|
|
42
|
+
sess = myrt.Session("foo.mdl")
|
|
43
|
+
out = sess.run(inputs)
|
|
44
|
+
sess.close() # optional (model only)
|
|
45
|
+
|
|
46
|
+
The global runtime is initialised lazily when the first Session
|
|
47
|
+
is created and stays alive until:
|
|
48
|
+
• the interpreter exits, or
|
|
49
|
+
• `myrt.shutdown()` is called.
|
|
50
|
+
"""
|
|
51
|
+
|
|
52
|
+
# ---- construction -------------------------------------------------
|
|
53
|
+
def __init__(self, model_path: str) -> None:
|
|
54
|
+
_ensure_runtime()
|
|
55
|
+
|
|
56
|
+
# safety net – make GC close the model
|
|
57
|
+
def __del__(self) -> None:
|
|
58
|
+
try:
|
|
59
|
+
self.close()
|
|
60
|
+
except Exception:
|
|
61
|
+
pass
|
|
62
|
+
|
|
63
|
+
# allow `with Session(...) as s:` syntax
|
|
64
|
+
def __enter__(self) -> "Session":
|
|
65
|
+
return self
|
|
66
|
+
|
|
67
|
+
def __exit__(self, exc_type, exc, tb) -> None:
|
|
68
|
+
self.close()
|
nexaai/tts.py
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
from typing import List, Optional, Union
|
|
2
|
+
from abc import abstractmethod
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
|
|
5
|
+
from nexaai.base import BaseModel
|
|
6
|
+
from nexaai.common import PluginID
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@dataclass
|
|
10
|
+
class TTSConfig:
|
|
11
|
+
"""Configuration for TTS."""
|
|
12
|
+
voice: str = "default"
|
|
13
|
+
speed: float = 1.0
|
|
14
|
+
seed: int = -1 # –1 for random
|
|
15
|
+
sample_rate: int = 22050
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@dataclass
|
|
19
|
+
class TTSSamplerConfig:
|
|
20
|
+
"""Configuration for TTS sampling."""
|
|
21
|
+
temperature: float = 1.0
|
|
22
|
+
noise_scale: float = 0.667
|
|
23
|
+
length_scale: float = 1.0
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@dataclass
|
|
27
|
+
class TTSResult:
|
|
28
|
+
"""Result from TTS processing."""
|
|
29
|
+
audio_path: str # Path where the synthesized audio is saved
|
|
30
|
+
duration_seconds: float
|
|
31
|
+
sample_rate: int
|
|
32
|
+
channels: int
|
|
33
|
+
num_samples: int
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class TTS(BaseModel):
|
|
37
|
+
"""Abstract base class for Text-to-Speech models."""
|
|
38
|
+
|
|
39
|
+
def __init__(self):
|
|
40
|
+
"""Initialize base TTS class."""
|
|
41
|
+
pass
|
|
42
|
+
|
|
43
|
+
@classmethod
|
|
44
|
+
def _load_from(cls,
|
|
45
|
+
model_path: str,
|
|
46
|
+
vocoder_path: str,
|
|
47
|
+
plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
|
|
48
|
+
device_id: Optional[str] = None
|
|
49
|
+
) -> 'TTS':
|
|
50
|
+
"""Load TTS model from local path, routing to appropriate implementation."""
|
|
51
|
+
# Check plugin_id value for routing - handle both enum and string
|
|
52
|
+
plugin_value = plugin_id.value if isinstance(plugin_id, PluginID) else plugin_id
|
|
53
|
+
|
|
54
|
+
if plugin_value == "mlx":
|
|
55
|
+
from nexaai.tts_impl.mlx_tts_impl import MLXTTSImpl
|
|
56
|
+
return MLXTTSImpl._load_from(model_path, vocoder_path, plugin_id, device_id)
|
|
57
|
+
else:
|
|
58
|
+
from nexaai.tts_impl.pybind_tts_impl import PyBindTTSImpl
|
|
59
|
+
return PyBindTTSImpl._load_from(model_path, vocoder_path, plugin_id, device_id)
|
|
60
|
+
|
|
61
|
+
@abstractmethod
|
|
62
|
+
def synthesize(
|
|
63
|
+
self,
|
|
64
|
+
text: str,
|
|
65
|
+
config: Optional[TTSConfig] = None,
|
|
66
|
+
output_path: Optional[str] = None,
|
|
67
|
+
) -> TTSResult:
|
|
68
|
+
"""Synthesize speech from text and save to filesystem."""
|
|
69
|
+
pass
|
|
70
|
+
|
|
71
|
+
@abstractmethod
|
|
72
|
+
def list_available_voices(self) -> List[str]:
|
|
73
|
+
"""List available voices."""
|
|
74
|
+
pass
|
|
File without changes
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
# Note: This code is generated by Cursor, not tested yet.
|
|
2
|
+
|
|
3
|
+
from typing import List, Optional, Union
|
|
4
|
+
import os
|
|
5
|
+
|
|
6
|
+
from nexaai.common import PluginID
|
|
7
|
+
from nexaai.tts import TTS, TTSConfig, TTSResult
|
|
8
|
+
from nexaai.mlx_backend.tts.interface import MlxTts as MLXTTSInterface
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class MLXTTSImpl(TTS):
|
|
12
|
+
def __init__(self):
|
|
13
|
+
"""Initialize MLX TTS implementation."""
|
|
14
|
+
super().__init__()
|
|
15
|
+
self._mlx_tts = None
|
|
16
|
+
|
|
17
|
+
@classmethod
|
|
18
|
+
def _load_from(cls,
|
|
19
|
+
model_path: str,
|
|
20
|
+
vocoder_path: str,
|
|
21
|
+
plugin_id: Union[PluginID, str] = PluginID.MLX,
|
|
22
|
+
device_id: Optional[str] = None
|
|
23
|
+
) -> 'MLXTTSImpl':
|
|
24
|
+
"""Load TTS model from local path using MLX backend."""
|
|
25
|
+
try:
|
|
26
|
+
# MLX TTS interface is already imported
|
|
27
|
+
|
|
28
|
+
# Create instance and load MLX TTS
|
|
29
|
+
instance = cls()
|
|
30
|
+
instance._mlx_tts = MLXTTSInterface(
|
|
31
|
+
model_path=model_path,
|
|
32
|
+
vocoder_path=vocoder_path,
|
|
33
|
+
device=device_id
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
return instance
|
|
37
|
+
except Exception as e:
|
|
38
|
+
raise RuntimeError(f"Failed to load MLX TTS: {str(e)}")
|
|
39
|
+
|
|
40
|
+
def eject(self):
|
|
41
|
+
"""Destroy the model and free resources."""
|
|
42
|
+
if self._mlx_tts:
|
|
43
|
+
self._mlx_tts.destroy()
|
|
44
|
+
self._mlx_tts = None
|
|
45
|
+
|
|
46
|
+
def synthesize(
|
|
47
|
+
self,
|
|
48
|
+
text: str,
|
|
49
|
+
config: Optional[TTSConfig] = None,
|
|
50
|
+
output_path: Optional[str] = None,
|
|
51
|
+
) -> TTSResult:
|
|
52
|
+
"""Synthesize speech from text and save to filesystem."""
|
|
53
|
+
if not self._mlx_tts:
|
|
54
|
+
raise RuntimeError("MLX TTS not loaded")
|
|
55
|
+
|
|
56
|
+
try:
|
|
57
|
+
# Convert our config to MLX format if provided
|
|
58
|
+
mlx_config = None
|
|
59
|
+
if config:
|
|
60
|
+
from nexaai.mlx_backend.ml import TTSConfig as MLXTTSConfig
|
|
61
|
+
|
|
62
|
+
mlx_config = MLXTTSConfig(
|
|
63
|
+
voice=config.voice,
|
|
64
|
+
speed=config.speed,
|
|
65
|
+
seed=config.seed,
|
|
66
|
+
sample_rate=config.sample_rate
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
# Use MLX TTS synthesis
|
|
70
|
+
result = self._mlx_tts.synthesize(text, mlx_config, output_path)
|
|
71
|
+
|
|
72
|
+
# Convert MLX result to our format
|
|
73
|
+
return TTSResult(
|
|
74
|
+
audio_path=result.audio_path,
|
|
75
|
+
duration_seconds=result.duration_seconds,
|
|
76
|
+
sample_rate=result.sample_rate,
|
|
77
|
+
channels=result.channels,
|
|
78
|
+
num_samples=result.num_samples
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
except Exception as e:
|
|
82
|
+
raise RuntimeError(f"Failed to synthesize speech: {str(e)}")
|
|
83
|
+
|
|
84
|
+
def list_available_voices(self) -> List[str]:
|
|
85
|
+
"""List available voices."""
|
|
86
|
+
if not self._mlx_tts:
|
|
87
|
+
raise RuntimeError("MLX TTS not loaded")
|
|
88
|
+
|
|
89
|
+
try:
|
|
90
|
+
return self._mlx_tts.list_available_voices()
|
|
91
|
+
except Exception as e:
|
|
92
|
+
raise RuntimeError(f"Failed to list available voices: {str(e)}")
|
|
93
|
+
|
|
94
|
+
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
from typing import List, Optional, Union
|
|
2
|
+
|
|
3
|
+
from nexaai.common import PluginID
|
|
4
|
+
from nexaai.tts import TTS, TTSConfig, TTSResult
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class PyBindTTSImpl(TTS):
|
|
8
|
+
def __init__(self):
|
|
9
|
+
"""Initialize PyBind TTS implementation."""
|
|
10
|
+
super().__init__()
|
|
11
|
+
# TODO: Add PyBind-specific initialization
|
|
12
|
+
|
|
13
|
+
@classmethod
|
|
14
|
+
def _load_from(cls,
|
|
15
|
+
model_path: str,
|
|
16
|
+
vocoder_path: str,
|
|
17
|
+
plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
|
|
18
|
+
device_id: Optional[str] = None
|
|
19
|
+
) -> 'PyBindTTSImpl':
|
|
20
|
+
"""Load TTS model from local path using PyBind backend."""
|
|
21
|
+
# TODO: Implement PyBind TTS loading
|
|
22
|
+
instance = cls()
|
|
23
|
+
return instance
|
|
24
|
+
|
|
25
|
+
def eject(self):
|
|
26
|
+
"""Destroy the model and free resources."""
|
|
27
|
+
# TODO: Implement PyBind TTS cleanup
|
|
28
|
+
pass
|
|
29
|
+
|
|
30
|
+
def synthesize(
|
|
31
|
+
self,
|
|
32
|
+
text: str,
|
|
33
|
+
config: Optional[TTSConfig] = None,
|
|
34
|
+
output_path: Optional[str] = None,
|
|
35
|
+
) -> TTSResult:
|
|
36
|
+
"""Synthesize speech from text and save to filesystem."""
|
|
37
|
+
# TODO: Implement PyBind TTS synthesis
|
|
38
|
+
raise NotImplementedError("PyBind TTS synthesis not yet implemented")
|
|
39
|
+
|
|
40
|
+
def list_available_voices(self) -> List[str]:
|
|
41
|
+
"""List available voices."""
|
|
42
|
+
# TODO: Implement PyBind TTS voice listing
|
|
43
|
+
raise NotImplementedError("PyBind TTS voice listing not yet implemented")
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
"""Utility for fetching avatar URLs from HuggingFace."""
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
from typing import Dict, Optional
|
|
5
|
+
import httpx
|
|
6
|
+
|
|
7
|
+
logger = logging.getLogger(__name__)
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def fetch_avatar_urls_from_hf_api(query: str, custom_endpoint: Optional[str] = None) -> Dict[str, str]:
|
|
11
|
+
"""
|
|
12
|
+
Fetch avatar URLs from HuggingFace models-json endpoint.
|
|
13
|
+
|
|
14
|
+
Args:
|
|
15
|
+
query: Search query to fetch models for
|
|
16
|
+
custom_endpoint: Optional custom HuggingFace endpoint
|
|
17
|
+
|
|
18
|
+
Returns:
|
|
19
|
+
Dictionary mapping author names to avatar URLs
|
|
20
|
+
"""
|
|
21
|
+
avatar_map = {}
|
|
22
|
+
try:
|
|
23
|
+
# Use the base URL from the configured endpoint
|
|
24
|
+
base_url = custom_endpoint if custom_endpoint else "https://huggingface.co"
|
|
25
|
+
|
|
26
|
+
# Build the URL with query parameter
|
|
27
|
+
url = f"{base_url}/models-json?sort=trending&search={query}&withCount=true"
|
|
28
|
+
|
|
29
|
+
# Make the HTTP request with a timeout
|
|
30
|
+
with httpx.Client(timeout=2.0) as client:
|
|
31
|
+
response = client.get(url)
|
|
32
|
+
|
|
33
|
+
if response.status_code == 200:
|
|
34
|
+
data = response.json()
|
|
35
|
+
models = data.get("models", [])
|
|
36
|
+
|
|
37
|
+
# Build a map of author names to avatar URLs
|
|
38
|
+
for model in models:
|
|
39
|
+
author = model.get("author")
|
|
40
|
+
author_data = model.get("authorData", {})
|
|
41
|
+
avatar_url = author_data.get("avatarUrl")
|
|
42
|
+
|
|
43
|
+
if author and avatar_url:
|
|
44
|
+
# Handle relative URLs by prepending appropriate base URL
|
|
45
|
+
if avatar_url.startswith("/"):
|
|
46
|
+
avatar_url = f"{base_url}{avatar_url}"
|
|
47
|
+
avatar_map[author] = avatar_url
|
|
48
|
+
|
|
49
|
+
logger.debug(f"Fetched {len(avatar_map)} avatar URLs from HuggingFace API")
|
|
50
|
+
else:
|
|
51
|
+
logger.warning(f"Failed to fetch avatar URLs: HTTP {response.status_code}")
|
|
52
|
+
|
|
53
|
+
except Exception as e:
|
|
54
|
+
logger.warning(f"Error fetching avatar URLs from HuggingFace API: {e}")
|
|
55
|
+
# Return empty map on error - we'll fall back to default behavior
|
|
56
|
+
|
|
57
|
+
return avatar_map
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def get_avatar_url_for_repo(repo_id: str, search_query: Optional[str] = None,
|
|
61
|
+
custom_endpoint: Optional[str] = None) -> Optional[str]:
|
|
62
|
+
"""
|
|
63
|
+
Get avatar URL for a repository ID.
|
|
64
|
+
|
|
65
|
+
This method tries multiple strategies:
|
|
66
|
+
1. If search_query is provided, fetch from HuggingFace API with that query
|
|
67
|
+
2. Try fetching with the full repo_id as query
|
|
68
|
+
3. Try fetching with just the organization name as query
|
|
69
|
+
4. Fall back to CDN URL pattern
|
|
70
|
+
|
|
71
|
+
Args:
|
|
72
|
+
repo_id: Repository ID in format "owner/repo"
|
|
73
|
+
search_query: Optional search query to use for fetching avatars
|
|
74
|
+
custom_endpoint: Optional custom HuggingFace endpoint
|
|
75
|
+
|
|
76
|
+
Returns:
|
|
77
|
+
Avatar URL or None if not found
|
|
78
|
+
"""
|
|
79
|
+
if "/" not in repo_id:
|
|
80
|
+
return None
|
|
81
|
+
|
|
82
|
+
org_name = repo_id.split("/")[0]
|
|
83
|
+
|
|
84
|
+
# Try with search query if provided
|
|
85
|
+
if search_query:
|
|
86
|
+
avatar_map = fetch_avatar_urls_from_hf_api(search_query, custom_endpoint)
|
|
87
|
+
avatar_url = avatar_map.get(org_name)
|
|
88
|
+
if avatar_url:
|
|
89
|
+
return avatar_url
|
|
90
|
+
|
|
91
|
+
# Try with full repo_id
|
|
92
|
+
avatar_map = fetch_avatar_urls_from_hf_api(repo_id, custom_endpoint)
|
|
93
|
+
avatar_url = avatar_map.get(org_name)
|
|
94
|
+
if avatar_url:
|
|
95
|
+
return avatar_url
|
|
96
|
+
|
|
97
|
+
# Try with just organization name
|
|
98
|
+
avatar_map = fetch_avatar_urls_from_hf_api(org_name, custom_endpoint)
|
|
99
|
+
avatar_url = avatar_map.get(org_name)
|
|
100
|
+
if avatar_url:
|
|
101
|
+
return avatar_url
|
|
102
|
+
|
|
103
|
+
# Fallback to CDN URL pattern
|
|
104
|
+
return f"https://cdn-thumbnails.huggingface.co/social-thumbnails/{org_name}.png"
|
nexaai/utils/decode.py
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Utility functions for text decoding operations.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def safe_decode(data):
|
|
7
|
+
"""
|
|
8
|
+
Safely decode bytes or text, handling UTF-8 errors.
|
|
9
|
+
|
|
10
|
+
Args:
|
|
11
|
+
data: Input data that can be bytes or text
|
|
12
|
+
|
|
13
|
+
Returns:
|
|
14
|
+
str: Decoded string with errors replaced if any
|
|
15
|
+
"""
|
|
16
|
+
if isinstance(data, bytes):
|
|
17
|
+
return data.decode('utf-8', errors='replace')
|
|
18
|
+
return str(data)
|