nexaai 1.0.16rc13__cp310-cp310-macosx_15_0_universal2.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nexaai might be problematic. Click here for more details.
- nexaai/__init__.py +83 -0
- nexaai/_stub.cpython-310-darwin.so +0 -0
- nexaai/_version.py +4 -0
- nexaai/asr.py +64 -0
- nexaai/asr_impl/__init__.py +0 -0
- nexaai/asr_impl/mlx_asr_impl.py +92 -0
- nexaai/asr_impl/pybind_asr_impl.py +44 -0
- nexaai/base.py +39 -0
- nexaai/binds/__init__.py +4 -0
- nexaai/binds/common_bind.cpython-310-darwin.so +0 -0
- nexaai/binds/embedder_bind.cpython-310-darwin.so +0 -0
- nexaai/binds/libnexa_bridge.dylib +0 -0
- nexaai/binds/llm_bind.cpython-310-darwin.so +0 -0
- nexaai/binds/nexa_llama_cpp/libggml-base.dylib +0 -0
- nexaai/binds/nexa_llama_cpp/libggml-cpu.so +0 -0
- nexaai/binds/nexa_llama_cpp/libggml-metal.so +0 -0
- nexaai/binds/nexa_llama_cpp/libggml.dylib +0 -0
- nexaai/binds/nexa_llama_cpp/libllama.dylib +0 -0
- nexaai/binds/nexa_llama_cpp/libmtmd.dylib +0 -0
- nexaai/binds/nexa_llama_cpp/libnexa_plugin.dylib +0 -0
- nexaai/binds/nexa_mlx/libnexa_plugin.dylib +0 -0
- nexaai/binds/nexa_mlx/py-lib/ml.py +888 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/__init__.py +0 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/__init__.py +1 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/__init__.py +5 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/bigvgan/__init__.py +1 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/bigvgan/activation.py +51 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/bigvgan/amp.py +96 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/bigvgan/bigvgan.py +149 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/bigvgan/conv.py +114 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/bigvgan/resample.py +177 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/descript/__init__.py +1 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/descript/base.py +228 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/descript/dac.py +285 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/descript/nn/__init__.py +1 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/descript/nn/layers.py +129 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/descript/nn/quantize.py +149 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/encodec/__init__.py +1 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/encodec/encodec.py +777 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/mimi/__init__.py +1 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/mimi/mimi.py +286 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/mimi/modules/__init__.py +20 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/mimi/modules/conv.py +398 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/mimi/modules/kv_cache.py +199 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/mimi/modules/quantization.py +179 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/mimi/modules/seanet.py +314 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/mimi/modules/transformer.py +256 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/s3/__init__.py +1 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/s3/model.py +260 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/s3/model_v2.py +383 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/s3/utils.py +122 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/snac/__init__.py +1 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/snac/attention.py +97 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/snac/layers.py +306 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/snac/snac.py +154 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/snac/vq.py +135 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/vocos/__init__.py +1 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/vocos/mel.py +33 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/vocos/vocos.py +359 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/tests/__init__.py +0 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/tests/test_bigvgan.py +54 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/tests/test_descript.py +109 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/tests/test_encodec.py +58 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/tests/test_mimi.py +22 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/tests/test_s3.py +25 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/tests/test_snac.py +40 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/tests/test_vocos.py +93 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/server.py +525 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/sts/__init__.py +0 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/sts/tests/test_voice_pipeline.py +156 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/sts/voice_pipeline.py +327 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/stt/__init__.py +0 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/stt/generate.py +174 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/stt/models/__init__.py +0 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/stt/models/parakeet/__init__.py +1 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/stt/models/parakeet/alignment.py +248 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/stt/models/parakeet/attention.py +187 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/stt/models/parakeet/audio.py +76 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/stt/models/parakeet/conformer.py +331 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/stt/models/parakeet/ctc.py +34 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/stt/models/parakeet/parakeet.py +604 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/stt/models/parakeet/rnnt.py +157 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/stt/models/parakeet/tokenizer.py +2 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/stt/models/wav2vec/feature_extractor.py +757 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/stt/models/wav2vec/wav2vec.py +738 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/stt/models/whisper/__init__.py +1 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/stt/models/whisper/audio.py +82 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/stt/models/whisper/decoding.py +742 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/stt/models/whisper/timing.py +329 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/stt/models/whisper/tokenizer.py +398 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/stt/models/whisper/whisper.py +862 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/stt/models/whisper/writers.py +268 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/stt/tests/test_models.py +381 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/stt/utils.py +195 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/__init__.py +1 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/audio_player.py +120 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/convert.py +71 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/generate.py +449 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/__init__.py +0 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/bark/__init__.py +4 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/bark/bark.py +528 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/bark/isftnet.py +12 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/bark/pipeline.py +442 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/base.py +84 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/dia/__init__.py +1 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/dia/audio.py +287 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/dia/config.py +256 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/dia/dia.py +592 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/dia/layers.py +870 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/indextts/__init__.py +3 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/indextts/attention.py +180 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/indextts/bigvgan.py +124 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/indextts/conformer.py +247 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/indextts/ecapa_tdnn/__init__.py +0 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/indextts/ecapa_tdnn/asp.py +59 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/indextts/ecapa_tdnn/ecapa_tdnn.py +91 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/indextts/ecapa_tdnn/se_res2net.py +132 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/indextts/ecapa_tdnn/tdnn.py +42 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/indextts/gpt2.py +38 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/indextts/indextts.py +412 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/indextts/mel.py +37 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/indextts/normalize.py +294 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/indextts/perceiver.py +62 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/interpolate.py +108 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/kokoro/__init__.py +4 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/kokoro/istftnet.py +979 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/kokoro/kokoro.py +331 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/kokoro/modules.py +659 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/kokoro/pipeline.py +453 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/kokoro/voice.py +113 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/llama/__init__.py +3 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/llama/llama.py +324 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/outetts/__init__.py +1 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/outetts/audio_processor.py +351 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/outetts/dac_interface.py +162 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/outetts/outetts.py +255 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/outetts/prompt_processor.py +181 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/outetts/tokens.py +36 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/sesame/__init__.py +3 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/sesame/attention.py +195 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/sesame/sesame.py +633 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/sesame/watermarking.py +105 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/spark/__init__.py +1 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/spark/audio_tokenizer.py +138 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/spark/bicodec.py +269 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/spark/modules/__init__.py +0 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/spark/modules/blocks/__init__.py +0 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/spark/modules/blocks/sampler.py +111 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/spark/modules/encoder_decoder/__init__.py +0 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/spark/modules/encoder_decoder/feat_decoder.py +120 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/spark/modules/encoder_decoder/feat_encoder.py +136 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/spark/modules/encoder_decoder/wave_generator.py +113 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/spark/modules/finite_scalar_quantization.py +238 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/spark/modules/residual.py +209 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/spark/modules/residual_fsq.py +309 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/spark/modules/speaker/__init__.py +1 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/spark/modules/speaker/ecapa_tdnn.py +283 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/spark/modules/speaker/perceiver_encoder.py +326 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/spark/modules/speaker/pooling_layers.py +297 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/spark/modules/speaker/speaker_encoder.py +155 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/spark/spark.py +382 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/spark/utils/audio.py +220 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/spark/utils/file.py +221 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/spark/utils/token_parser.py +181 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/tests/__init__.py +0 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/tests/test_base.py +66 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/tests/test_convert.py +173 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/tests/test_interpolate.py +88 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/tests/test_models.py +974 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/utils.py +337 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/utils.py +237 -0
- nexaai/binds/nexa_mlx/py-lib/mlx_audio/version.py +1 -0
- nexaai/binds/nexa_mlx/py-lib/profiling.py +239 -0
- nexaai/binds/nexa_nexaml/libggml-base.dylib +0 -0
- nexaai/binds/nexa_nexaml/libggml-cpu.so +0 -0
- nexaai/binds/nexa_nexaml/libggml-metal.so +0 -0
- nexaai/binds/nexa_nexaml/libggml.dylib +0 -0
- nexaai/binds/nexa_nexaml/libnexa-mm-process.dylib +0 -0
- nexaai/binds/nexa_nexaml/libnexa-sampling.dylib +0 -0
- nexaai/binds/nexa_nexaml/libnexa_plugin.dylib +0 -0
- nexaai/binds/nexa_nexaml/libnexaproc.dylib +0 -0
- nexaai/binds/nexa_nexaml/libqwen3-vl.dylib +0 -0
- nexaai/binds/nexa_nexaml/libqwen3vl-vision.dylib +0 -0
- nexaai/binds/vlm_bind.cpython-310-darwin.so +0 -0
- nexaai/common.py +104 -0
- nexaai/cv.py +92 -0
- nexaai/cv_impl/__init__.py +0 -0
- nexaai/cv_impl/mlx_cv_impl.py +89 -0
- nexaai/cv_impl/pybind_cv_impl.py +32 -0
- nexaai/embedder.py +72 -0
- nexaai/embedder_impl/__init__.py +0 -0
- nexaai/embedder_impl/mlx_embedder_impl.py +116 -0
- nexaai/embedder_impl/pybind_embedder_impl.py +95 -0
- nexaai/image_gen.py +140 -0
- nexaai/image_gen_impl/__init__.py +0 -0
- nexaai/image_gen_impl/mlx_image_gen_impl.py +292 -0
- nexaai/image_gen_impl/pybind_image_gen_impl.py +85 -0
- nexaai/llm.py +96 -0
- nexaai/llm_impl/__init__.py +0 -0
- nexaai/llm_impl/mlx_llm_impl.py +269 -0
- nexaai/llm_impl/pybind_llm_impl.py +218 -0
- nexaai/log.py +92 -0
- nexaai/mlx_backend/asr/__init__.py +12 -0
- nexaai/mlx_backend/asr/interface.py +122 -0
- nexaai/mlx_backend/common/__init__.py +0 -0
- nexaai/mlx_backend/common/utils.py +25 -0
- nexaai/mlx_backend/cv/__init__.py +0 -0
- nexaai/mlx_backend/cv/generate.py +195 -0
- nexaai/mlx_backend/cv/interface.py +151 -0
- nexaai/mlx_backend/cv/main.py +81 -0
- nexaai/mlx_backend/cv/modeling/pp_ocr_v4.py +1736 -0
- nexaai/mlx_backend/embedding/__init__.py +0 -0
- nexaai/mlx_backend/embedding/generate.py +333 -0
- nexaai/mlx_backend/embedding/interface.py +617 -0
- nexaai/mlx_backend/embedding/main.py +173 -0
- nexaai/mlx_backend/embedding/modeling/__init__.py +0 -0
- nexaai/mlx_backend/embedding/modeling/nexa_jina_v2.py +399 -0
- nexaai/mlx_backend/image_gen/__init__.py +1 -0
- nexaai/mlx_backend/image_gen/generate_sd.py +244 -0
- nexaai/mlx_backend/image_gen/interface.py +82 -0
- nexaai/mlx_backend/image_gen/main.py +281 -0
- nexaai/mlx_backend/image_gen/stable_diffusion/__init__.py +306 -0
- nexaai/mlx_backend/image_gen/stable_diffusion/clip.py +116 -0
- nexaai/mlx_backend/image_gen/stable_diffusion/config.py +65 -0
- nexaai/mlx_backend/image_gen/stable_diffusion/model_io.py +386 -0
- nexaai/mlx_backend/image_gen/stable_diffusion/sampler.py +105 -0
- nexaai/mlx_backend/image_gen/stable_diffusion/tokenizer.py +100 -0
- nexaai/mlx_backend/image_gen/stable_diffusion/unet.py +460 -0
- nexaai/mlx_backend/image_gen/stable_diffusion/vae.py +274 -0
- nexaai/mlx_backend/llm/__init__.py +0 -0
- nexaai/mlx_backend/llm/generate.py +149 -0
- nexaai/mlx_backend/llm/interface.py +764 -0
- nexaai/mlx_backend/llm/main.py +68 -0
- nexaai/mlx_backend/ml.py +888 -0
- nexaai/mlx_backend/mlx_audio/__init__.py +0 -0
- nexaai/mlx_backend/mlx_audio/codec/__init__.py +1 -0
- nexaai/mlx_backend/mlx_audio/codec/models/__init__.py +5 -0
- nexaai/mlx_backend/mlx_audio/codec/models/bigvgan/__init__.py +1 -0
- nexaai/mlx_backend/mlx_audio/codec/models/bigvgan/activation.py +51 -0
- nexaai/mlx_backend/mlx_audio/codec/models/bigvgan/amp.py +96 -0
- nexaai/mlx_backend/mlx_audio/codec/models/bigvgan/bigvgan.py +149 -0
- nexaai/mlx_backend/mlx_audio/codec/models/bigvgan/conv.py +114 -0
- nexaai/mlx_backend/mlx_audio/codec/models/bigvgan/resample.py +177 -0
- nexaai/mlx_backend/mlx_audio/codec/models/descript/__init__.py +1 -0
- nexaai/mlx_backend/mlx_audio/codec/models/descript/base.py +228 -0
- nexaai/mlx_backend/mlx_audio/codec/models/descript/dac.py +285 -0
- nexaai/mlx_backend/mlx_audio/codec/models/descript/nn/__init__.py +1 -0
- nexaai/mlx_backend/mlx_audio/codec/models/descript/nn/layers.py +129 -0
- nexaai/mlx_backend/mlx_audio/codec/models/descript/nn/quantize.py +149 -0
- nexaai/mlx_backend/mlx_audio/codec/models/encodec/__init__.py +1 -0
- nexaai/mlx_backend/mlx_audio/codec/models/encodec/encodec.py +777 -0
- nexaai/mlx_backend/mlx_audio/codec/models/mimi/__init__.py +1 -0
- nexaai/mlx_backend/mlx_audio/codec/models/mimi/mimi.py +286 -0
- nexaai/mlx_backend/mlx_audio/codec/models/mimi/modules/__init__.py +20 -0
- nexaai/mlx_backend/mlx_audio/codec/models/mimi/modules/conv.py +398 -0
- nexaai/mlx_backend/mlx_audio/codec/models/mimi/modules/kv_cache.py +199 -0
- nexaai/mlx_backend/mlx_audio/codec/models/mimi/modules/quantization.py +179 -0
- nexaai/mlx_backend/mlx_audio/codec/models/mimi/modules/seanet.py +314 -0
- nexaai/mlx_backend/mlx_audio/codec/models/mimi/modules/transformer.py +256 -0
- nexaai/mlx_backend/mlx_audio/codec/models/s3/__init__.py +1 -0
- nexaai/mlx_backend/mlx_audio/codec/models/s3/model.py +260 -0
- nexaai/mlx_backend/mlx_audio/codec/models/s3/model_v2.py +383 -0
- nexaai/mlx_backend/mlx_audio/codec/models/s3/utils.py +122 -0
- nexaai/mlx_backend/mlx_audio/codec/models/snac/__init__.py +1 -0
- nexaai/mlx_backend/mlx_audio/codec/models/snac/attention.py +97 -0
- nexaai/mlx_backend/mlx_audio/codec/models/snac/layers.py +306 -0
- nexaai/mlx_backend/mlx_audio/codec/models/snac/snac.py +154 -0
- nexaai/mlx_backend/mlx_audio/codec/models/snac/vq.py +135 -0
- nexaai/mlx_backend/mlx_audio/codec/models/vocos/__init__.py +1 -0
- nexaai/mlx_backend/mlx_audio/codec/models/vocos/mel.py +33 -0
- nexaai/mlx_backend/mlx_audio/codec/models/vocos/vocos.py +359 -0
- nexaai/mlx_backend/mlx_audio/codec/tests/__init__.py +0 -0
- nexaai/mlx_backend/mlx_audio/codec/tests/test_bigvgan.py +54 -0
- nexaai/mlx_backend/mlx_audio/codec/tests/test_descript.py +109 -0
- nexaai/mlx_backend/mlx_audio/codec/tests/test_encodec.py +58 -0
- nexaai/mlx_backend/mlx_audio/codec/tests/test_mimi.py +22 -0
- nexaai/mlx_backend/mlx_audio/codec/tests/test_s3.py +25 -0
- nexaai/mlx_backend/mlx_audio/codec/tests/test_snac.py +40 -0
- nexaai/mlx_backend/mlx_audio/codec/tests/test_vocos.py +93 -0
- nexaai/mlx_backend/mlx_audio/server.py +525 -0
- nexaai/mlx_backend/mlx_audio/sts/__init__.py +0 -0
- nexaai/mlx_backend/mlx_audio/sts/tests/test_voice_pipeline.py +156 -0
- nexaai/mlx_backend/mlx_audio/sts/voice_pipeline.py +327 -0
- nexaai/mlx_backend/mlx_audio/stt/__init__.py +0 -0
- nexaai/mlx_backend/mlx_audio/stt/generate.py +174 -0
- nexaai/mlx_backend/mlx_audio/stt/models/__init__.py +0 -0
- nexaai/mlx_backend/mlx_audio/stt/models/parakeet/__init__.py +1 -0
- nexaai/mlx_backend/mlx_audio/stt/models/parakeet/alignment.py +248 -0
- nexaai/mlx_backend/mlx_audio/stt/models/parakeet/attention.py +187 -0
- nexaai/mlx_backend/mlx_audio/stt/models/parakeet/audio.py +76 -0
- nexaai/mlx_backend/mlx_audio/stt/models/parakeet/conformer.py +331 -0
- nexaai/mlx_backend/mlx_audio/stt/models/parakeet/ctc.py +34 -0
- nexaai/mlx_backend/mlx_audio/stt/models/parakeet/parakeet.py +604 -0
- nexaai/mlx_backend/mlx_audio/stt/models/parakeet/rnnt.py +157 -0
- nexaai/mlx_backend/mlx_audio/stt/models/parakeet/tokenizer.py +2 -0
- nexaai/mlx_backend/mlx_audio/stt/models/wav2vec/feature_extractor.py +757 -0
- nexaai/mlx_backend/mlx_audio/stt/models/wav2vec/wav2vec.py +738 -0
- nexaai/mlx_backend/mlx_audio/stt/models/whisper/__init__.py +1 -0
- nexaai/mlx_backend/mlx_audio/stt/models/whisper/audio.py +82 -0
- nexaai/mlx_backend/mlx_audio/stt/models/whisper/decoding.py +742 -0
- nexaai/mlx_backend/mlx_audio/stt/models/whisper/timing.py +329 -0
- nexaai/mlx_backend/mlx_audio/stt/models/whisper/tokenizer.py +398 -0
- nexaai/mlx_backend/mlx_audio/stt/models/whisper/whisper.py +862 -0
- nexaai/mlx_backend/mlx_audio/stt/models/whisper/writers.py +268 -0
- nexaai/mlx_backend/mlx_audio/stt/tests/test_models.py +381 -0
- nexaai/mlx_backend/mlx_audio/stt/utils.py +195 -0
- nexaai/mlx_backend/mlx_audio/tts/__init__.py +1 -0
- nexaai/mlx_backend/mlx_audio/tts/audio_player.py +120 -0
- nexaai/mlx_backend/mlx_audio/tts/convert.py +71 -0
- nexaai/mlx_backend/mlx_audio/tts/generate.py +449 -0
- nexaai/mlx_backend/mlx_audio/tts/models/__init__.py +0 -0
- nexaai/mlx_backend/mlx_audio/tts/models/bark/__init__.py +4 -0
- nexaai/mlx_backend/mlx_audio/tts/models/bark/bark.py +528 -0
- nexaai/mlx_backend/mlx_audio/tts/models/bark/isftnet.py +12 -0
- nexaai/mlx_backend/mlx_audio/tts/models/bark/pipeline.py +442 -0
- nexaai/mlx_backend/mlx_audio/tts/models/base.py +84 -0
- nexaai/mlx_backend/mlx_audio/tts/models/dia/__init__.py +1 -0
- nexaai/mlx_backend/mlx_audio/tts/models/dia/audio.py +287 -0
- nexaai/mlx_backend/mlx_audio/tts/models/dia/config.py +256 -0
- nexaai/mlx_backend/mlx_audio/tts/models/dia/dia.py +592 -0
- nexaai/mlx_backend/mlx_audio/tts/models/dia/layers.py +870 -0
- nexaai/mlx_backend/mlx_audio/tts/models/indextts/__init__.py +3 -0
- nexaai/mlx_backend/mlx_audio/tts/models/indextts/attention.py +180 -0
- nexaai/mlx_backend/mlx_audio/tts/models/indextts/bigvgan.py +124 -0
- nexaai/mlx_backend/mlx_audio/tts/models/indextts/conformer.py +247 -0
- nexaai/mlx_backend/mlx_audio/tts/models/indextts/ecapa_tdnn/__init__.py +0 -0
- nexaai/mlx_backend/mlx_audio/tts/models/indextts/ecapa_tdnn/asp.py +59 -0
- nexaai/mlx_backend/mlx_audio/tts/models/indextts/ecapa_tdnn/ecapa_tdnn.py +91 -0
- nexaai/mlx_backend/mlx_audio/tts/models/indextts/ecapa_tdnn/se_res2net.py +132 -0
- nexaai/mlx_backend/mlx_audio/tts/models/indextts/ecapa_tdnn/tdnn.py +42 -0
- nexaai/mlx_backend/mlx_audio/tts/models/indextts/gpt2.py +38 -0
- nexaai/mlx_backend/mlx_audio/tts/models/indextts/indextts.py +412 -0
- nexaai/mlx_backend/mlx_audio/tts/models/indextts/mel.py +37 -0
- nexaai/mlx_backend/mlx_audio/tts/models/indextts/normalize.py +294 -0
- nexaai/mlx_backend/mlx_audio/tts/models/indextts/perceiver.py +62 -0
- nexaai/mlx_backend/mlx_audio/tts/models/interpolate.py +108 -0
- nexaai/mlx_backend/mlx_audio/tts/models/kokoro/__init__.py +4 -0
- nexaai/mlx_backend/mlx_audio/tts/models/kokoro/istftnet.py +979 -0
- nexaai/mlx_backend/mlx_audio/tts/models/kokoro/kokoro.py +331 -0
- nexaai/mlx_backend/mlx_audio/tts/models/kokoro/modules.py +659 -0
- nexaai/mlx_backend/mlx_audio/tts/models/kokoro/pipeline.py +453 -0
- nexaai/mlx_backend/mlx_audio/tts/models/kokoro/voice.py +113 -0
- nexaai/mlx_backend/mlx_audio/tts/models/llama/__init__.py +3 -0
- nexaai/mlx_backend/mlx_audio/tts/models/llama/llama.py +324 -0
- nexaai/mlx_backend/mlx_audio/tts/models/outetts/__init__.py +1 -0
- nexaai/mlx_backend/mlx_audio/tts/models/outetts/audio_processor.py +351 -0
- nexaai/mlx_backend/mlx_audio/tts/models/outetts/dac_interface.py +162 -0
- nexaai/mlx_backend/mlx_audio/tts/models/outetts/default_speaker.json +461 -0
- nexaai/mlx_backend/mlx_audio/tts/models/outetts/outetts.py +255 -0
- nexaai/mlx_backend/mlx_audio/tts/models/outetts/prompt_processor.py +181 -0
- nexaai/mlx_backend/mlx_audio/tts/models/outetts/tokens.py +36 -0
- nexaai/mlx_backend/mlx_audio/tts/models/sesame/__init__.py +3 -0
- nexaai/mlx_backend/mlx_audio/tts/models/sesame/attention.py +195 -0
- nexaai/mlx_backend/mlx_audio/tts/models/sesame/sesame.py +633 -0
- nexaai/mlx_backend/mlx_audio/tts/models/sesame/watermarking.py +105 -0
- nexaai/mlx_backend/mlx_audio/tts/models/spark/__init__.py +1 -0
- nexaai/mlx_backend/mlx_audio/tts/models/spark/audio_tokenizer.py +138 -0
- nexaai/mlx_backend/mlx_audio/tts/models/spark/bicodec.py +269 -0
- nexaai/mlx_backend/mlx_audio/tts/models/spark/modules/__init__.py +0 -0
- nexaai/mlx_backend/mlx_audio/tts/models/spark/modules/blocks/__init__.py +0 -0
- nexaai/mlx_backend/mlx_audio/tts/models/spark/modules/blocks/sampler.py +111 -0
- nexaai/mlx_backend/mlx_audio/tts/models/spark/modules/encoder_decoder/__init__.py +0 -0
- nexaai/mlx_backend/mlx_audio/tts/models/spark/modules/encoder_decoder/feat_decoder.py +120 -0
- nexaai/mlx_backend/mlx_audio/tts/models/spark/modules/encoder_decoder/feat_encoder.py +136 -0
- nexaai/mlx_backend/mlx_audio/tts/models/spark/modules/encoder_decoder/wave_generator.py +113 -0
- nexaai/mlx_backend/mlx_audio/tts/models/spark/modules/finite_scalar_quantization.py +238 -0
- nexaai/mlx_backend/mlx_audio/tts/models/spark/modules/residual.py +209 -0
- nexaai/mlx_backend/mlx_audio/tts/models/spark/modules/residual_fsq.py +309 -0
- nexaai/mlx_backend/mlx_audio/tts/models/spark/modules/speaker/__init__.py +1 -0
- nexaai/mlx_backend/mlx_audio/tts/models/spark/modules/speaker/ecapa_tdnn.py +283 -0
- nexaai/mlx_backend/mlx_audio/tts/models/spark/modules/speaker/perceiver_encoder.py +326 -0
- nexaai/mlx_backend/mlx_audio/tts/models/spark/modules/speaker/pooling_layers.py +297 -0
- nexaai/mlx_backend/mlx_audio/tts/models/spark/modules/speaker/speaker_encoder.py +155 -0
- nexaai/mlx_backend/mlx_audio/tts/models/spark/spark.py +382 -0
- nexaai/mlx_backend/mlx_audio/tts/models/spark/utils/audio.py +220 -0
- nexaai/mlx_backend/mlx_audio/tts/models/spark/utils/file.py +221 -0
- nexaai/mlx_backend/mlx_audio/tts/models/spark/utils/token_parser.py +181 -0
- nexaai/mlx_backend/mlx_audio/tts/tests/__init__.py +0 -0
- nexaai/mlx_backend/mlx_audio/tts/tests/test_base.py +66 -0
- nexaai/mlx_backend/mlx_audio/tts/tests/test_convert.py +173 -0
- nexaai/mlx_backend/mlx_audio/tts/tests/test_interpolate.py +88 -0
- nexaai/mlx_backend/mlx_audio/tts/tests/test_models.py +974 -0
- nexaai/mlx_backend/mlx_audio/tts/utils.py +337 -0
- nexaai/mlx_backend/mlx_audio/utils.py +237 -0
- nexaai/mlx_backend/mlx_audio/version.py +1 -0
- nexaai/mlx_backend/profiling.py +239 -0
- nexaai/mlx_backend/rerank/__init__.py +0 -0
- nexaai/mlx_backend/rerank/generate.py +174 -0
- nexaai/mlx_backend/rerank/interface.py +287 -0
- nexaai/mlx_backend/rerank/main.py +127 -0
- nexaai/mlx_backend/rerank/modeling/__init__.py +0 -0
- nexaai/mlx_backend/rerank/modeling/nexa_jina_rerank.py +330 -0
- nexaai/mlx_backend/sd/__init__.py +1 -0
- nexaai/mlx_backend/sd/interface.py +362 -0
- nexaai/mlx_backend/sd/main.py +286 -0
- nexaai/mlx_backend/sd/modeling/__init__.py +306 -0
- nexaai/mlx_backend/sd/modeling/clip.py +116 -0
- nexaai/mlx_backend/sd/modeling/config.py +65 -0
- nexaai/mlx_backend/sd/modeling/model_io.py +385 -0
- nexaai/mlx_backend/sd/modeling/sampler.py +105 -0
- nexaai/mlx_backend/sd/modeling/tokenizer.py +100 -0
- nexaai/mlx_backend/sd/modeling/unet.py +460 -0
- nexaai/mlx_backend/sd/modeling/vae.py +274 -0
- nexaai/mlx_backend/tts/__init__.py +12 -0
- nexaai/mlx_backend/tts/interface.py +276 -0
- nexaai/mlx_backend/vlm/__init__.py +3 -0
- nexaai/mlx_backend/vlm/generate.py +572 -0
- nexaai/mlx_backend/vlm/generate_qwen3_vl.py +261 -0
- nexaai/mlx_backend/vlm/interface.py +415 -0
- nexaai/mlx_backend/vlm/main.py +316 -0
- nexaai/mlx_backend/vlm/modeling/__init__.py +0 -0
- nexaai/mlx_backend/vlm/modeling/convert.py +68 -0
- nexaai/mlx_backend/vlm/modeling/models/__init__.py +0 -0
- nexaai/mlx_backend/vlm/modeling/models/aya_vision/__init__.py +8 -0
- nexaai/mlx_backend/vlm/modeling/models/aya_vision/aya_vision.py +193 -0
- nexaai/mlx_backend/vlm/modeling/models/aya_vision/interpolate.py +186 -0
- nexaai/mlx_backend/vlm/modeling/models/aya_vision/language.py +233 -0
- nexaai/mlx_backend/vlm/modeling/models/aya_vision/vision.py +503 -0
- nexaai/mlx_backend/vlm/modeling/models/base.py +202 -0
- nexaai/mlx_backend/vlm/modeling/models/cache.py +230 -0
- nexaai/mlx_backend/vlm/modeling/models/deepseek_vl_v2/__init__.py +10 -0
- nexaai/mlx_backend/vlm/modeling/models/deepseek_vl_v2/conversation.py +264 -0
- nexaai/mlx_backend/vlm/modeling/models/deepseek_vl_v2/deepseek_vl_v2.py +472 -0
- nexaai/mlx_backend/vlm/modeling/models/deepseek_vl_v2/language.py +591 -0
- nexaai/mlx_backend/vlm/modeling/models/deepseek_vl_v2/processing_deepsek_vl_v2.py +526 -0
- nexaai/mlx_backend/vlm/modeling/models/deepseek_vl_v2/vision.py +356 -0
- nexaai/mlx_backend/vlm/modeling/models/florence2/__init__.py +8 -0
- nexaai/mlx_backend/vlm/modeling/models/florence2/florence2.py +366 -0
- nexaai/mlx_backend/vlm/modeling/models/florence2/language.py +488 -0
- nexaai/mlx_backend/vlm/modeling/models/florence2/vision.py +591 -0
- nexaai/mlx_backend/vlm/modeling/models/gemma3/__init__.py +8 -0
- nexaai/mlx_backend/vlm/modeling/models/gemma3/gemma3.py +213 -0
- nexaai/mlx_backend/vlm/modeling/models/gemma3/language.py +315 -0
- nexaai/mlx_backend/vlm/modeling/models/gemma3/vision.py +238 -0
- nexaai/mlx_backend/vlm/modeling/models/gemma3n/__init__.py +2 -0
- nexaai/mlx_backend/vlm/modeling/models/gemma3n/audio.py +1038 -0
- nexaai/mlx_backend/vlm/modeling/models/gemma3n/config.py +139 -0
- nexaai/mlx_backend/vlm/modeling/models/gemma3n/gemma3n.py +322 -0
- nexaai/mlx_backend/vlm/modeling/models/gemma3n/language.py +629 -0
- nexaai/mlx_backend/vlm/modeling/models/gemma3n/vision.py +1022 -0
- nexaai/mlx_backend/vlm/modeling/models/idefics2/__init__.py +9 -0
- nexaai/mlx_backend/vlm/modeling/models/idefics2/idefics2.py +294 -0
- nexaai/mlx_backend/vlm/modeling/models/idefics2/language.py +191 -0
- nexaai/mlx_backend/vlm/modeling/models/idefics2/vision.py +267 -0
- nexaai/mlx_backend/vlm/modeling/models/idefics3/__init__.py +8 -0
- nexaai/mlx_backend/vlm/modeling/models/idefics3/idefics3.py +175 -0
- nexaai/mlx_backend/vlm/modeling/models/idefics3/language.py +192 -0
- nexaai/mlx_backend/vlm/modeling/models/idefics3/vision.py +233 -0
- nexaai/mlx_backend/vlm/modeling/models/internvl_chat/__init__.py +9 -0
- nexaai/mlx_backend/vlm/modeling/models/internvl_chat/internvl_chat.py +140 -0
- nexaai/mlx_backend/vlm/modeling/models/internvl_chat/language.py +220 -0
- nexaai/mlx_backend/vlm/modeling/models/internvl_chat/processor.py +393 -0
- nexaai/mlx_backend/vlm/modeling/models/internvl_chat/vision.py +293 -0
- nexaai/mlx_backend/vlm/modeling/models/kernels.py +307 -0
- nexaai/mlx_backend/vlm/modeling/models/kimi_vl/__init__.py +8 -0
- nexaai/mlx_backend/vlm/modeling/models/kimi_vl/kimi_vl.py +143 -0
- nexaai/mlx_backend/vlm/modeling/models/kimi_vl/language.py +509 -0
- nexaai/mlx_backend/vlm/modeling/models/kimi_vl/vision.py +522 -0
- nexaai/mlx_backend/vlm/modeling/models/llama4/__init__.py +8 -0
- nexaai/mlx_backend/vlm/modeling/models/llama4/language.py +386 -0
- nexaai/mlx_backend/vlm/modeling/models/llama4/llama4.py +138 -0
- nexaai/mlx_backend/vlm/modeling/models/llama4/vision.py +560 -0
- nexaai/mlx_backend/vlm/modeling/models/llava/__init__.py +8 -0
- nexaai/mlx_backend/vlm/modeling/models/llava/language.py +240 -0
- nexaai/mlx_backend/vlm/modeling/models/llava/llava.py +153 -0
- nexaai/mlx_backend/vlm/modeling/models/llava/vision.py +259 -0
- nexaai/mlx_backend/vlm/modeling/models/llava_bunny/__init__.py +9 -0
- nexaai/mlx_backend/vlm/modeling/models/llava_bunny/language.py +236 -0
- nexaai/mlx_backend/vlm/modeling/models/llava_bunny/llava_bunny.py +256 -0
- nexaai/mlx_backend/vlm/modeling/models/llava_bunny/vision.py +303 -0
- nexaai/mlx_backend/vlm/modeling/models/llava_next/__init__.py +8 -0
- nexaai/mlx_backend/vlm/modeling/models/llava_next/language.py +230 -0
- nexaai/mlx_backend/vlm/modeling/models/llava_next/llava_next.py +160 -0
- nexaai/mlx_backend/vlm/modeling/models/llava_next/vision.py +243 -0
- nexaai/mlx_backend/vlm/modeling/models/mistral3/__init__.py +8 -0
- nexaai/mlx_backend/vlm/modeling/models/mistral3/mistral3.py +283 -0
- nexaai/mlx_backend/vlm/modeling/models/mllama/__init__.py +8 -0
- nexaai/mlx_backend/vlm/modeling/models/mllama/language.py +416 -0
- nexaai/mlx_backend/vlm/modeling/models/mllama/mllama.py +172 -0
- nexaai/mlx_backend/vlm/modeling/models/mllama/vision.py +499 -0
- nexaai/mlx_backend/vlm/modeling/models/molmo/__init__.py +8 -0
- nexaai/mlx_backend/vlm/modeling/models/molmo/language.py +243 -0
- nexaai/mlx_backend/vlm/modeling/models/molmo/molmo.py +133 -0
- nexaai/mlx_backend/vlm/modeling/models/molmo/vision.py +465 -0
- nexaai/mlx_backend/vlm/modeling/models/multi_modality/__init__.py +10 -0
- nexaai/mlx_backend/vlm/modeling/models/multi_modality/language.py +230 -0
- nexaai/mlx_backend/vlm/modeling/models/multi_modality/multi_modality.py +385 -0
- nexaai/mlx_backend/vlm/modeling/models/multi_modality/sam.py +557 -0
- nexaai/mlx_backend/vlm/modeling/models/multi_modality/vision.py +526 -0
- nexaai/mlx_backend/vlm/modeling/models/paligemma/__init__.py +8 -0
- nexaai/mlx_backend/vlm/modeling/models/paligemma/language.py +282 -0
- nexaai/mlx_backend/vlm/modeling/models/paligemma/paligemma.py +160 -0
- nexaai/mlx_backend/vlm/modeling/models/paligemma/vision.py +242 -0
- nexaai/mlx_backend/vlm/modeling/models/phi3_v/__init__.py +8 -0
- nexaai/mlx_backend/vlm/modeling/models/phi3_v/language.py +21 -0
- nexaai/mlx_backend/vlm/modeling/models/phi3_v/phi3_v.py +243 -0
- nexaai/mlx_backend/vlm/modeling/models/phi3_v/su_rope.py +71 -0
- nexaai/mlx_backend/vlm/modeling/models/phi3_v/vision.py +324 -0
- nexaai/mlx_backend/vlm/modeling/models/pixtral/__init__.py +8 -0
- nexaai/mlx_backend/vlm/modeling/models/pixtral/language.py +229 -0
- nexaai/mlx_backend/vlm/modeling/models/pixtral/pixtral.py +161 -0
- nexaai/mlx_backend/vlm/modeling/models/pixtral/vision.py +320 -0
- nexaai/mlx_backend/vlm/modeling/models/qwen2_5_vl/__init__.py +2 -0
- nexaai/mlx_backend/vlm/modeling/models/qwen2_5_vl/config.py +108 -0
- nexaai/mlx_backend/vlm/modeling/models/qwen2_5_vl/language.py +490 -0
- nexaai/mlx_backend/vlm/modeling/models/qwen2_5_vl/qwen2_5_vl.py +168 -0
- nexaai/mlx_backend/vlm/modeling/models/qwen2_5_vl/vision.py +414 -0
- nexaai/mlx_backend/vlm/modeling/models/qwen2_vl/__init__.py +2 -0
- nexaai/mlx_backend/vlm/modeling/models/qwen2_vl/config.py +104 -0
- nexaai/mlx_backend/vlm/modeling/models/qwen2_vl/language.py +490 -0
- nexaai/mlx_backend/vlm/modeling/models/qwen2_vl/qwen2_vl.py +167 -0
- nexaai/mlx_backend/vlm/modeling/models/qwen2_vl/vision.py +312 -0
- nexaai/mlx_backend/vlm/modeling/models/qwen3_vl/llm_common/__init__.py +0 -0
- nexaai/mlx_backend/vlm/modeling/models/qwen3_vl/llm_common/base.py +117 -0
- nexaai/mlx_backend/vlm/modeling/models/qwen3_vl/llm_common/cache.py +531 -0
- nexaai/mlx_backend/vlm/modeling/models/qwen3_vl/llm_common/generate.py +701 -0
- nexaai/mlx_backend/vlm/modeling/models/qwen3_vl/llm_common/rope_utils.py +255 -0
- nexaai/mlx_backend/vlm/modeling/models/qwen3_vl/llm_common/sample_utils.py +303 -0
- nexaai/mlx_backend/vlm/modeling/models/qwen3_vl/llm_common/tokenizer_utils.py +407 -0
- nexaai/mlx_backend/vlm/modeling/models/qwen3_vl/processor.py +476 -0
- nexaai/mlx_backend/vlm/modeling/models/qwen3_vl/qwen3vl.py +1223 -0
- nexaai/mlx_backend/vlm/modeling/models/smolvlm/__init__.py +8 -0
- nexaai/mlx_backend/vlm/modeling/models/smolvlm/smolvlm.py +62 -0
- nexaai/mlx_backend/vlm/modeling/processing_qwen2_5_vl.py +209 -0
- nexaai/mlx_backend/vlm/modeling/processing_qwen2_vl.py +215 -0
- nexaai/mlx_backend/vlm/modeling/prompt_utils.py +474 -0
- nexaai/mlx_backend/vlm/modeling/sample_utils.py +39 -0
- nexaai/mlx_backend/vlm/modeling/tokenizer_utils.py +344 -0
- nexaai/mlx_backend/vlm/modeling/trainer/__init__.py +9 -0
- nexaai/mlx_backend/vlm/modeling/trainer/lora.py +70 -0
- nexaai/mlx_backend/vlm/modeling/trainer/trainer.py +296 -0
- nexaai/mlx_backend/vlm/modeling/trainer/utils.py +160 -0
- nexaai/mlx_backend/vlm/modeling/utils.py +928 -0
- nexaai/rerank.py +55 -0
- nexaai/rerank_impl/__init__.py +0 -0
- nexaai/rerank_impl/mlx_rerank_impl.py +92 -0
- nexaai/rerank_impl/pybind_rerank_impl.py +43 -0
- nexaai/runtime.py +68 -0
- nexaai/tts.py +74 -0
- nexaai/tts_impl/__init__.py +0 -0
- nexaai/tts_impl/mlx_tts_impl.py +94 -0
- nexaai/tts_impl/pybind_tts_impl.py +43 -0
- nexaai/utils/avatar_fetcher.py +104 -0
- nexaai/utils/decode.py +18 -0
- nexaai/utils/manifest_utils.py +324 -0
- nexaai/utils/model_manager.py +1353 -0
- nexaai/utils/model_types.py +47 -0
- nexaai/utils/progress_tracker.py +385 -0
- nexaai/utils/quantization_utils.py +245 -0
- nexaai/vlm.py +128 -0
- nexaai/vlm_impl/__init__.py +0 -0
- nexaai/vlm_impl/mlx_vlm_impl.py +258 -0
- nexaai/vlm_impl/pybind_vlm_impl.py +230 -0
- nexaai-1.0.16rc13.dist-info/METADATA +32 -0
- nexaai-1.0.16rc13.dist-info/RECORD +557 -0
- nexaai-1.0.16rc13.dist-info/WHEEL +5 -0
- nexaai-1.0.16rc13.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
from typing import List, Union
|
|
2
|
+
import numpy as np
|
|
3
|
+
|
|
4
|
+
from nexaai.common import PluginID
|
|
5
|
+
from nexaai.embedder import Embedder, EmbeddingConfig
|
|
6
|
+
from nexaai.mlx_backend.embedding.interface import create_embedder
|
|
7
|
+
from nexaai.mlx_backend.ml import ModelConfig as MLXModelConfig, SamplerConfig as MLXSamplerConfig, GenerationConfig as MLXGenerationConfig, EmbeddingConfig
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class MLXEmbedderImpl(Embedder):
|
|
11
|
+
def __init__(self):
|
|
12
|
+
"""Initialize MLX Embedder implementation."""
|
|
13
|
+
super().__init__()
|
|
14
|
+
self._mlx_embedder = None
|
|
15
|
+
|
|
16
|
+
@classmethod
|
|
17
|
+
def _load_from(cls, model_path: str, tokenizer_file: str = "tokenizer.json", plugin_id: Union[PluginID, str] = PluginID.MLX):
|
|
18
|
+
"""
|
|
19
|
+
Load an embedder from model files using MLX backend.
|
|
20
|
+
|
|
21
|
+
Args:
|
|
22
|
+
model_path: Path to the model file
|
|
23
|
+
tokenizer_file: Path to the tokenizer file (default: "tokenizer.json")
|
|
24
|
+
plugin_id: Plugin ID to use for the model (default: PluginID.MLX)
|
|
25
|
+
|
|
26
|
+
Returns:
|
|
27
|
+
MLXEmbedderImpl instance
|
|
28
|
+
"""
|
|
29
|
+
try:
|
|
30
|
+
# Create instance
|
|
31
|
+
instance = cls()
|
|
32
|
+
|
|
33
|
+
# Use the factory function to create the appropriate embedder based on model type
|
|
34
|
+
# This will automatically detect if it's JinaV2 or generic model and route correctly
|
|
35
|
+
instance._mlx_embedder = create_embedder(
|
|
36
|
+
model_path=model_path,
|
|
37
|
+
tokenizer_path=tokenizer_file
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
# Load the model
|
|
41
|
+
success = instance._mlx_embedder.load_model(model_path)
|
|
42
|
+
if not success:
|
|
43
|
+
raise RuntimeError("Failed to load MLX embedder model")
|
|
44
|
+
|
|
45
|
+
return instance
|
|
46
|
+
except Exception as e:
|
|
47
|
+
raise RuntimeError(f"Failed to load MLX Embedder: {str(e)}")
|
|
48
|
+
|
|
49
|
+
def eject(self):
|
|
50
|
+
"""
|
|
51
|
+
Clean up resources and destroy the embedder
|
|
52
|
+
"""
|
|
53
|
+
if self._mlx_embedder:
|
|
54
|
+
self._mlx_embedder.destroy()
|
|
55
|
+
self._mlx_embedder = None
|
|
56
|
+
|
|
57
|
+
def generate(self, texts: Union[List[str], str] = None, config: EmbeddingConfig = EmbeddingConfig(), input_ids: Union[List[int], List[List[int]]] = None) -> np.ndarray:
|
|
58
|
+
"""
|
|
59
|
+
Generate embeddings for the given texts or input_ids.
|
|
60
|
+
|
|
61
|
+
Args:
|
|
62
|
+
texts: List of strings or single string to embed
|
|
63
|
+
input_ids: Pre-tokenized input as:
|
|
64
|
+
- Single sequence: list of integers [1, 2, 3, 4]
|
|
65
|
+
- Multiple sequences: list of lists [[1, 2, 3], [4, 5, 6]]
|
|
66
|
+
config: Configuration for embedding generation
|
|
67
|
+
|
|
68
|
+
Returns:
|
|
69
|
+
numpy array of embeddings with shape (num_sequences, embedding_dim)
|
|
70
|
+
"""
|
|
71
|
+
if not self._mlx_embedder:
|
|
72
|
+
raise RuntimeError("MLX Embedder not loaded")
|
|
73
|
+
|
|
74
|
+
if texts is None and input_ids is None:
|
|
75
|
+
raise ValueError("Either texts or input_ids must be provided")
|
|
76
|
+
|
|
77
|
+
# MLX embedder currently only supports text input, not pre-tokenized input_ids
|
|
78
|
+
if input_ids is not None:
|
|
79
|
+
raise NotImplementedError("MLX embedder does not support input_ids, only text input")
|
|
80
|
+
|
|
81
|
+
try:
|
|
82
|
+
# Convert single string to list if needed
|
|
83
|
+
if isinstance(texts, str):
|
|
84
|
+
texts = [texts]
|
|
85
|
+
|
|
86
|
+
# MLX config classes are already imported
|
|
87
|
+
|
|
88
|
+
# Convert our config to MLX config
|
|
89
|
+
mlx_config = EmbeddingConfig()
|
|
90
|
+
mlx_config.batch_size = config.batch_size
|
|
91
|
+
mlx_config.normalize = config.normalize
|
|
92
|
+
mlx_config.normalize_method = config.normalize_method
|
|
93
|
+
|
|
94
|
+
# Generate embeddings using MLX
|
|
95
|
+
embeddings = self._mlx_embedder.embed(texts, mlx_config)
|
|
96
|
+
|
|
97
|
+
# Convert to numpy array
|
|
98
|
+
return np.array(embeddings, dtype=np.float32)
|
|
99
|
+
|
|
100
|
+
except Exception as e:
|
|
101
|
+
raise RuntimeError(f"Failed to generate embeddings: {str(e)}")
|
|
102
|
+
|
|
103
|
+
def get_embedding_dim(self) -> int:
|
|
104
|
+
"""
|
|
105
|
+
Get the embedding dimension of the model
|
|
106
|
+
|
|
107
|
+
Returns:
|
|
108
|
+
The embedding dimension in int
|
|
109
|
+
"""
|
|
110
|
+
if not self._mlx_embedder:
|
|
111
|
+
raise RuntimeError("MLX Embedder not loaded")
|
|
112
|
+
|
|
113
|
+
try:
|
|
114
|
+
return self._mlx_embedder.embedding_dim()
|
|
115
|
+
except Exception as e:
|
|
116
|
+
raise RuntimeError(f"Failed to get embedding dimension: {str(e)}")
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
from typing import List, Union
|
|
2
|
+
import numpy as np
|
|
3
|
+
|
|
4
|
+
from nexaai.common import PluginID
|
|
5
|
+
from nexaai.embedder import Embedder, EmbeddingConfig
|
|
6
|
+
from nexaai.binds import embedder_bind
|
|
7
|
+
from nexaai.runtime import _ensure_runtime
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class PyBindEmbedderImpl(Embedder):
|
|
11
|
+
def __init__(self, _handle_ptr):
|
|
12
|
+
"""
|
|
13
|
+
Internal initializer
|
|
14
|
+
"""
|
|
15
|
+
super().__init__()
|
|
16
|
+
self._handle = _handle_ptr
|
|
17
|
+
|
|
18
|
+
@classmethod
|
|
19
|
+
def _load_from(cls, model_path: str, tokenizer_file: str = "tokenizer.json", plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP):
|
|
20
|
+
"""
|
|
21
|
+
Load an embedder from model files
|
|
22
|
+
|
|
23
|
+
Args:
|
|
24
|
+
model_path: Path to the model file
|
|
25
|
+
tokenizer_file: Path to the tokenizer file (default: "tokenizer.json")
|
|
26
|
+
plugin_id: Plugin ID to use for the model (default: PluginID.LLAMA_CPP)
|
|
27
|
+
|
|
28
|
+
Returns:
|
|
29
|
+
PyBindEmbedderImpl instance
|
|
30
|
+
"""
|
|
31
|
+
_ensure_runtime()
|
|
32
|
+
# Convert enum to string for C++ binding
|
|
33
|
+
plugin_id_str = plugin_id.value if isinstance(plugin_id, PluginID) else plugin_id
|
|
34
|
+
# New parameter order: model_path, plugin_id, tokenizer_path (optional)
|
|
35
|
+
handle = embedder_bind.ml_embedder_create(model_path, plugin_id_str, tokenizer_file)
|
|
36
|
+
return cls(handle)
|
|
37
|
+
|
|
38
|
+
def eject(self):
|
|
39
|
+
"""
|
|
40
|
+
Clean up resources and destroy the embedder
|
|
41
|
+
"""
|
|
42
|
+
# Destructor of the handle will unload the model correctly
|
|
43
|
+
del self._handle
|
|
44
|
+
self._handle = None
|
|
45
|
+
|
|
46
|
+
def generate(self, texts: Union[List[str], str] = None, config: EmbeddingConfig = EmbeddingConfig(), input_ids: Union[List[int], List[List[int]]] = None) -> np.ndarray:
|
|
47
|
+
"""
|
|
48
|
+
Generate embeddings for the given texts or input_ids.
|
|
49
|
+
|
|
50
|
+
Args:
|
|
51
|
+
texts: List of strings or single string to embed
|
|
52
|
+
input_ids: Pre-tokenized input as:
|
|
53
|
+
- Single sequence: list of integers [1, 2, 3, 4]
|
|
54
|
+
- Multiple sequences: list of lists [[1, 2, 3], [4, 5, 6]]
|
|
55
|
+
config: Configuration for embedding generation
|
|
56
|
+
|
|
57
|
+
Returns:
|
|
58
|
+
numpy array of embeddings with shape (num_sequences, embedding_dim)
|
|
59
|
+
"""
|
|
60
|
+
if texts is None and input_ids is None:
|
|
61
|
+
raise ValueError("Either texts or input_ids must be provided")
|
|
62
|
+
|
|
63
|
+
# Create bind config
|
|
64
|
+
bind_config = embedder_bind.EmbeddingConfig()
|
|
65
|
+
bind_config.batch_size = config.batch_size
|
|
66
|
+
bind_config.normalize = config.normalize
|
|
67
|
+
bind_config.normalize_method = config.normalize_method
|
|
68
|
+
|
|
69
|
+
# Convert single string to list if needed
|
|
70
|
+
if isinstance(texts, str):
|
|
71
|
+
texts = [texts]
|
|
72
|
+
|
|
73
|
+
# Convert input_ids to 2D format if needed
|
|
74
|
+
processed_input_ids = None
|
|
75
|
+
if input_ids is not None:
|
|
76
|
+
if len(input_ids) > 0 and isinstance(input_ids[0], int):
|
|
77
|
+
# Single sequence: convert [1, 2, 3] to [[1, 2, 3]]
|
|
78
|
+
processed_input_ids = [input_ids]
|
|
79
|
+
else:
|
|
80
|
+
# Multiple sequences: already in correct format [[1, 2], [3, 4]]
|
|
81
|
+
processed_input_ids = input_ids
|
|
82
|
+
|
|
83
|
+
# Pass both parameters, let the ABI handle validation
|
|
84
|
+
embeddings = embedder_bind.ml_embedder_embed(self._handle, bind_config, texts, processed_input_ids)
|
|
85
|
+
|
|
86
|
+
return embeddings
|
|
87
|
+
|
|
88
|
+
def get_embedding_dim(self) -> int:
|
|
89
|
+
"""
|
|
90
|
+
Get the embedding dimension of the model
|
|
91
|
+
|
|
92
|
+
Returns:
|
|
93
|
+
The embedding dimension in int
|
|
94
|
+
"""
|
|
95
|
+
return embedder_bind.ml_embedder_embedding_dim(self._handle)
|
nexaai/image_gen.py
ADDED
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
from typing import List, Optional, Union
|
|
2
|
+
from abc import abstractmethod
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
|
|
5
|
+
from nexaai.base import BaseModel
|
|
6
|
+
from nexaai.common import PluginID
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@dataclass
|
|
10
|
+
class Image:
|
|
11
|
+
"""Image data structure."""
|
|
12
|
+
data: List[float] # width × height × channels
|
|
13
|
+
width: int
|
|
14
|
+
height: int
|
|
15
|
+
channels: int # 3 = RGB, 4 = RGBA
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@dataclass
|
|
19
|
+
class ImageSamplerConfig:
|
|
20
|
+
"""Configuration for image sampling."""
|
|
21
|
+
method: str = "ddim"
|
|
22
|
+
steps: int = 20
|
|
23
|
+
guidance_scale: float = 7.5
|
|
24
|
+
eta: float = 0.0
|
|
25
|
+
seed: int = -1 # –1 for random
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@dataclass
|
|
29
|
+
class ImageGenerationConfig:
|
|
30
|
+
"""Configuration for image generation."""
|
|
31
|
+
prompts: Union[str, List[str]]
|
|
32
|
+
negative_prompts: Optional[Union[str, List[str]]] = None
|
|
33
|
+
height: int = 512
|
|
34
|
+
width: int = 512
|
|
35
|
+
sampler_config: Optional[ImageSamplerConfig] = None
|
|
36
|
+
lora_id: int = -1 # –1 for none
|
|
37
|
+
init_image: Optional[Image] = None
|
|
38
|
+
strength: float = 1.0
|
|
39
|
+
n_images: int = 1
|
|
40
|
+
n_rows: int = 1
|
|
41
|
+
decoding_batch_size: int = 1
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
@dataclass
|
|
45
|
+
class SchedulerConfig:
|
|
46
|
+
"""Configuration for diffusion scheduler."""
|
|
47
|
+
type: str = "ddim"
|
|
48
|
+
num_train_timesteps: int = 1000
|
|
49
|
+
steps_offset: int = 0 # An offset added to the inference steps
|
|
50
|
+
beta_start: float = 0.00085
|
|
51
|
+
beta_end: float = 0.012
|
|
52
|
+
beta_schedule: str = "scaled_linear"
|
|
53
|
+
prediction_type: str = "epsilon"
|
|
54
|
+
timestep_type: str = "discrete"
|
|
55
|
+
timestep_spacing: str = "linspace"
|
|
56
|
+
interpolation_type: str = "linear"
|
|
57
|
+
config_path: Optional[str] = None
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
class ImageGen(BaseModel):
|
|
61
|
+
"""Abstract base class for image generation models."""
|
|
62
|
+
|
|
63
|
+
def __init__(self):
|
|
64
|
+
"""Initialize base image generation class."""
|
|
65
|
+
pass
|
|
66
|
+
|
|
67
|
+
@classmethod
|
|
68
|
+
def _load_from(cls,
|
|
69
|
+
model_path: str,
|
|
70
|
+
scheduler_config_path: str = "",
|
|
71
|
+
plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
|
|
72
|
+
device_id: Optional[str] = None,
|
|
73
|
+
float16: bool = True,
|
|
74
|
+
quantize: bool = False
|
|
75
|
+
) -> 'ImageGen':
|
|
76
|
+
"""Load image generation model from local path, routing to appropriate implementation."""
|
|
77
|
+
# Check plugin_id value for routing - handle both enum and string
|
|
78
|
+
plugin_value = plugin_id.value if isinstance(plugin_id, PluginID) else plugin_id
|
|
79
|
+
|
|
80
|
+
if plugin_value == "mlx":
|
|
81
|
+
from nexaai.image_gen_impl.mlx_image_gen_impl import MLXImageGenImpl
|
|
82
|
+
return MLXImageGenImpl._load_from(model_path, scheduler_config_path, plugin_id, device_id, float16, quantize)
|
|
83
|
+
else:
|
|
84
|
+
from nexaai.image_gen_impl.pybind_image_gen_impl import PyBindImageGenImpl
|
|
85
|
+
return PyBindImageGenImpl._load_from(model_path, scheduler_config_path, plugin_id, device_id, float16, quantize)
|
|
86
|
+
|
|
87
|
+
@abstractmethod
|
|
88
|
+
def load_model(self, model_path: str, extra_data: Optional[str] = None) -> bool:
|
|
89
|
+
"""Load model from path."""
|
|
90
|
+
pass
|
|
91
|
+
|
|
92
|
+
@abstractmethod
|
|
93
|
+
def set_scheduler(self, config: SchedulerConfig) -> None:
|
|
94
|
+
"""Set scheduler configuration."""
|
|
95
|
+
pass
|
|
96
|
+
|
|
97
|
+
@abstractmethod
|
|
98
|
+
def set_sampler(self, config: ImageSamplerConfig) -> None:
|
|
99
|
+
"""Set sampler configuration."""
|
|
100
|
+
pass
|
|
101
|
+
|
|
102
|
+
@abstractmethod
|
|
103
|
+
def reset_sampler(self) -> None:
|
|
104
|
+
"""Reset sampler to default configuration."""
|
|
105
|
+
pass
|
|
106
|
+
|
|
107
|
+
@abstractmethod
|
|
108
|
+
def txt2img(self, prompt: str, config: ImageGenerationConfig) -> Image:
|
|
109
|
+
"""Generate image from text prompt."""
|
|
110
|
+
pass
|
|
111
|
+
|
|
112
|
+
@abstractmethod
|
|
113
|
+
def img2img(self, init_image: Image, prompt: str, config: ImageGenerationConfig) -> Image:
|
|
114
|
+
"""Generate image from initial image and text prompt."""
|
|
115
|
+
pass
|
|
116
|
+
|
|
117
|
+
@abstractmethod
|
|
118
|
+
def generate(self, config: ImageGenerationConfig) -> Image:
|
|
119
|
+
"""Generate image from configuration."""
|
|
120
|
+
pass
|
|
121
|
+
|
|
122
|
+
@abstractmethod
|
|
123
|
+
def set_lora(self, lora_id: int) -> None:
|
|
124
|
+
"""Set active LoRA adapter."""
|
|
125
|
+
pass
|
|
126
|
+
|
|
127
|
+
@abstractmethod
|
|
128
|
+
def add_lora(self, lora_path: str) -> int:
|
|
129
|
+
"""Add LoRA adapter and return its ID."""
|
|
130
|
+
pass
|
|
131
|
+
|
|
132
|
+
@abstractmethod
|
|
133
|
+
def remove_lora(self, lora_id: int) -> None:
|
|
134
|
+
"""Remove LoRA adapter."""
|
|
135
|
+
pass
|
|
136
|
+
|
|
137
|
+
@abstractmethod
|
|
138
|
+
def list_loras(self) -> List[int]:
|
|
139
|
+
"""List available LoRA adapters."""
|
|
140
|
+
pass
|
|
File without changes
|
|
@@ -0,0 +1,292 @@
|
|
|
1
|
+
# Note: This code is generated by Cursor, not tested yet.
|
|
2
|
+
|
|
3
|
+
from typing import List, Optional, Union
|
|
4
|
+
import os
|
|
5
|
+
|
|
6
|
+
from nexaai.common import PluginID
|
|
7
|
+
from nexaai.image_gen import ImageGen, ImageGenerationConfig, ImageSamplerConfig, SchedulerConfig, Image
|
|
8
|
+
from nexaai.mlx_backend.sd.interface import ImageGen as MLXImageGenInterface
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class MLXImageGenImpl(ImageGen):
|
|
12
|
+
def __init__(self):
|
|
13
|
+
"""Initialize MLX Image Generation implementation."""
|
|
14
|
+
super().__init__()
|
|
15
|
+
self._mlx_image_gen = None
|
|
16
|
+
|
|
17
|
+
@classmethod
|
|
18
|
+
def _load_from(cls,
|
|
19
|
+
model_path: str,
|
|
20
|
+
scheduler_config_path: str = "",
|
|
21
|
+
plugin_id: Union[PluginID, str] = PluginID.MLX,
|
|
22
|
+
device_id: Optional[str] = None,
|
|
23
|
+
float16: bool = True,
|
|
24
|
+
quantize: bool = False
|
|
25
|
+
) -> 'MLXImageGenImpl':
|
|
26
|
+
"""Load image generation model from local path using MLX backend."""
|
|
27
|
+
try:
|
|
28
|
+
# MLX Image Generation interface is already imported
|
|
29
|
+
|
|
30
|
+
# Create instance and load MLX image generation model
|
|
31
|
+
instance = cls()
|
|
32
|
+
instance._mlx_image_gen = MLXImageGenInterface(
|
|
33
|
+
model_path=model_path,
|
|
34
|
+
scheduler_config_path=scheduler_config_path,
|
|
35
|
+
device=device_id,
|
|
36
|
+
float16=float16,
|
|
37
|
+
quantize=quantize
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
return instance
|
|
41
|
+
except Exception as e:
|
|
42
|
+
raise RuntimeError(f"Failed to load MLX Image Generation: {str(e)}")
|
|
43
|
+
|
|
44
|
+
def eject(self):
|
|
45
|
+
"""Destroy the model and free resources."""
|
|
46
|
+
if self._mlx_image_gen:
|
|
47
|
+
self._mlx_image_gen.destroy()
|
|
48
|
+
self._mlx_image_gen = None
|
|
49
|
+
|
|
50
|
+
def load_model(self, model_path: str, extra_data: Optional[str] = None) -> bool:
|
|
51
|
+
"""Load model from path."""
|
|
52
|
+
if not self._mlx_image_gen:
|
|
53
|
+
raise RuntimeError("MLX Image Generator not initialized")
|
|
54
|
+
|
|
55
|
+
try:
|
|
56
|
+
return self._mlx_image_gen.load_model(model_path, extra_data)
|
|
57
|
+
except Exception as e:
|
|
58
|
+
raise RuntimeError(f"Failed to load image generation model: {str(e)}")
|
|
59
|
+
|
|
60
|
+
def set_scheduler(self, config: SchedulerConfig) -> None:
|
|
61
|
+
"""Set scheduler configuration."""
|
|
62
|
+
if not self._mlx_image_gen:
|
|
63
|
+
raise RuntimeError("MLX Image Generator not loaded")
|
|
64
|
+
|
|
65
|
+
try:
|
|
66
|
+
# Convert our config to MLX format
|
|
67
|
+
import sys
|
|
68
|
+
mlx_path = self._get_mlx_path()
|
|
69
|
+
if mlx_path not in sys.path:
|
|
70
|
+
sys.path.insert(0, mlx_path)
|
|
71
|
+
|
|
72
|
+
# MLX ImageGen uses a simpler interface, we'll store the config for use in generation
|
|
73
|
+
self._scheduler_config = config
|
|
74
|
+
self._mlx_image_gen.set_scheduler(config)
|
|
75
|
+
except Exception as e:
|
|
76
|
+
raise RuntimeError(f"Failed to set scheduler: {str(e)}")
|
|
77
|
+
|
|
78
|
+
def set_sampler(self, config: ImageSamplerConfig) -> None:
|
|
79
|
+
"""Set sampler configuration."""
|
|
80
|
+
if not self._mlx_image_gen:
|
|
81
|
+
raise RuntimeError("MLX Image Generator not loaded")
|
|
82
|
+
|
|
83
|
+
try:
|
|
84
|
+
# Convert our config to MLX format
|
|
85
|
+
import sys
|
|
86
|
+
mlx_path = self._get_mlx_path()
|
|
87
|
+
if mlx_path not in sys.path:
|
|
88
|
+
sys.path.insert(0, mlx_path)
|
|
89
|
+
from sd.interface import ImageSamplerConfig as MLXImageSamplerConfig
|
|
90
|
+
|
|
91
|
+
mlx_config = MLXImageSamplerConfig(
|
|
92
|
+
method=config.method,
|
|
93
|
+
steps=config.steps,
|
|
94
|
+
guidance_scale=config.guidance_scale,
|
|
95
|
+
eta=config.eta,
|
|
96
|
+
seed=config.seed
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
self._mlx_image_gen.set_sampler(mlx_config)
|
|
100
|
+
except Exception as e:
|
|
101
|
+
raise RuntimeError(f"Failed to set sampler: {str(e)}")
|
|
102
|
+
|
|
103
|
+
def reset_sampler(self) -> None:
|
|
104
|
+
"""Reset sampler to default configuration."""
|
|
105
|
+
if not self._mlx_image_gen:
|
|
106
|
+
raise RuntimeError("MLX Image Generator not loaded")
|
|
107
|
+
|
|
108
|
+
try:
|
|
109
|
+
self._mlx_image_gen.reset_sampler()
|
|
110
|
+
except Exception as e:
|
|
111
|
+
raise RuntimeError(f"Failed to reset sampler: {str(e)}")
|
|
112
|
+
|
|
113
|
+
def txt2img(self, prompt: str, config: ImageGenerationConfig) -> Image:
|
|
114
|
+
"""Generate image from text prompt."""
|
|
115
|
+
if not self._mlx_image_gen:
|
|
116
|
+
raise RuntimeError("MLX Image Generator not loaded")
|
|
117
|
+
|
|
118
|
+
try:
|
|
119
|
+
# Convert our config to MLX format
|
|
120
|
+
import sys
|
|
121
|
+
mlx_path = self._get_mlx_path()
|
|
122
|
+
if mlx_path not in sys.path:
|
|
123
|
+
sys.path.insert(0, mlx_path)
|
|
124
|
+
from sd.interface import ImageGenerationConfig as MLXImageGenerationConfig
|
|
125
|
+
|
|
126
|
+
mlx_config = self._convert_generation_config(config)
|
|
127
|
+
|
|
128
|
+
# Use MLX text-to-image generation
|
|
129
|
+
result = self._mlx_image_gen.txt2img(prompt, mlx_config)
|
|
130
|
+
|
|
131
|
+
# Convert MLX result to our format
|
|
132
|
+
return self._convert_image_result(result)
|
|
133
|
+
|
|
134
|
+
except Exception as e:
|
|
135
|
+
raise RuntimeError(f"Failed to generate image from text: {str(e)}")
|
|
136
|
+
|
|
137
|
+
def img2img(self, init_image: Image, prompt: str, config: ImageGenerationConfig) -> Image:
|
|
138
|
+
"""Generate image from initial image and text prompt."""
|
|
139
|
+
if not self._mlx_image_gen:
|
|
140
|
+
raise RuntimeError("MLX Image Generator not loaded")
|
|
141
|
+
|
|
142
|
+
try:
|
|
143
|
+
# Convert our config to MLX format
|
|
144
|
+
mlx_config = self._convert_generation_config(config)
|
|
145
|
+
|
|
146
|
+
# Convert our image to MLX format
|
|
147
|
+
import sys
|
|
148
|
+
mlx_path = self._get_mlx_path()
|
|
149
|
+
if mlx_path not in sys.path:
|
|
150
|
+
sys.path.insert(0, mlx_path)
|
|
151
|
+
from sd.interface import Image as MLXImage
|
|
152
|
+
|
|
153
|
+
mlx_init_image = MLXImage(
|
|
154
|
+
data=init_image.data,
|
|
155
|
+
width=init_image.width,
|
|
156
|
+
height=init_image.height,
|
|
157
|
+
channels=init_image.channels
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
# Use MLX image-to-image generation
|
|
161
|
+
result = self._mlx_image_gen.img2img(mlx_init_image, prompt, mlx_config)
|
|
162
|
+
|
|
163
|
+
# Convert MLX result to our format
|
|
164
|
+
return self._convert_image_result(result)
|
|
165
|
+
|
|
166
|
+
except Exception as e:
|
|
167
|
+
raise RuntimeError(f"Failed to generate image from image: {str(e)}")
|
|
168
|
+
|
|
169
|
+
def generate(self, config: ImageGenerationConfig) -> Image:
|
|
170
|
+
"""Generate image from configuration."""
|
|
171
|
+
if not self._mlx_image_gen:
|
|
172
|
+
raise RuntimeError("MLX Image Generator not loaded")
|
|
173
|
+
|
|
174
|
+
try:
|
|
175
|
+
# Convert our config to MLX format
|
|
176
|
+
mlx_config = self._convert_generation_config(config)
|
|
177
|
+
|
|
178
|
+
# Use MLX generation
|
|
179
|
+
result = self._mlx_image_gen.generate(mlx_config)
|
|
180
|
+
|
|
181
|
+
# Convert MLX result to our format
|
|
182
|
+
return self._convert_image_result(result)
|
|
183
|
+
|
|
184
|
+
except Exception as e:
|
|
185
|
+
raise RuntimeError(f"Failed to generate image: {str(e)}")
|
|
186
|
+
|
|
187
|
+
def set_lora(self, lora_id: int) -> None:
|
|
188
|
+
"""Set active LoRA adapter."""
|
|
189
|
+
if not self._mlx_image_gen:
|
|
190
|
+
raise RuntimeError("MLX Image Generator not loaded")
|
|
191
|
+
|
|
192
|
+
try:
|
|
193
|
+
self._mlx_image_gen.set_lora(lora_id)
|
|
194
|
+
except Exception as e:
|
|
195
|
+
raise RuntimeError(f"Failed to set LoRA: {str(e)}")
|
|
196
|
+
|
|
197
|
+
def add_lora(self, lora_path: str) -> int:
|
|
198
|
+
"""Add LoRA adapter and return its ID."""
|
|
199
|
+
if not self._mlx_image_gen:
|
|
200
|
+
raise RuntimeError("MLX Image Generator not loaded")
|
|
201
|
+
|
|
202
|
+
try:
|
|
203
|
+
return self._mlx_image_gen.add_lora(lora_path)
|
|
204
|
+
except Exception as e:
|
|
205
|
+
raise RuntimeError(f"Failed to add LoRA: {str(e)}")
|
|
206
|
+
|
|
207
|
+
def remove_lora(self, lora_id: int) -> None:
|
|
208
|
+
"""Remove LoRA adapter."""
|
|
209
|
+
if not self._mlx_image_gen:
|
|
210
|
+
raise RuntimeError("MLX Image Generator not loaded")
|
|
211
|
+
|
|
212
|
+
try:
|
|
213
|
+
self._mlx_image_gen.remove_lora(lora_id)
|
|
214
|
+
except Exception as e:
|
|
215
|
+
raise RuntimeError(f"Failed to remove LoRA: {str(e)}")
|
|
216
|
+
|
|
217
|
+
def list_loras(self) -> List[int]:
|
|
218
|
+
"""List available LoRA adapters."""
|
|
219
|
+
if not self._mlx_image_gen:
|
|
220
|
+
raise RuntimeError("MLX Image Generator not loaded")
|
|
221
|
+
|
|
222
|
+
try:
|
|
223
|
+
return self._mlx_image_gen.list_loras()
|
|
224
|
+
except Exception as e:
|
|
225
|
+
raise RuntimeError(f"Failed to list LoRAs: {str(e)}")
|
|
226
|
+
|
|
227
|
+
def _convert_generation_config(self, config: ImageGenerationConfig):
|
|
228
|
+
"""Convert our config to MLX format."""
|
|
229
|
+
import sys
|
|
230
|
+
mlx_path = self._get_mlx_path()
|
|
231
|
+
if mlx_path not in sys.path:
|
|
232
|
+
sys.path.insert(0, mlx_path)
|
|
233
|
+
from sd.interface import ImageGenerationConfig as MLXImageGenerationConfig, ImageSamplerConfig as MLXImageSamplerConfig
|
|
234
|
+
|
|
235
|
+
mlx_sampler_config = None
|
|
236
|
+
if config.sampler_config:
|
|
237
|
+
mlx_sampler_config = MLXImageSamplerConfig(
|
|
238
|
+
method=config.sampler_config.method,
|
|
239
|
+
steps=config.sampler_config.steps,
|
|
240
|
+
guidance_scale=config.sampler_config.guidance_scale,
|
|
241
|
+
eta=config.sampler_config.eta,
|
|
242
|
+
seed=config.sampler_config.seed
|
|
243
|
+
)
|
|
244
|
+
|
|
245
|
+
mlx_init_image = None
|
|
246
|
+
if config.init_image:
|
|
247
|
+
from sd.interface import Image as MLXImage
|
|
248
|
+
mlx_init_image = MLXImage(
|
|
249
|
+
data=config.init_image.data,
|
|
250
|
+
width=config.init_image.width,
|
|
251
|
+
height=config.init_image.height,
|
|
252
|
+
channels=config.init_image.channels
|
|
253
|
+
)
|
|
254
|
+
|
|
255
|
+
return MLXImageGenerationConfig(
|
|
256
|
+
prompts=config.prompts,
|
|
257
|
+
negative_prompts=config.negative_prompts,
|
|
258
|
+
height=config.height,
|
|
259
|
+
width=config.width,
|
|
260
|
+
sampler_config=mlx_sampler_config,
|
|
261
|
+
lora_id=config.lora_id,
|
|
262
|
+
init_image=mlx_init_image,
|
|
263
|
+
strength=config.strength,
|
|
264
|
+
n_images=config.n_images,
|
|
265
|
+
n_rows=config.n_rows,
|
|
266
|
+
decoding_batch_size=config.decoding_batch_size
|
|
267
|
+
)
|
|
268
|
+
|
|
269
|
+
def _convert_image_result(self, mlx_image):
|
|
270
|
+
"""Convert MLX image to our format."""
|
|
271
|
+
return Image(
|
|
272
|
+
data=mlx_image.data,
|
|
273
|
+
width=mlx_image.width,
|
|
274
|
+
height=mlx_image.height,
|
|
275
|
+
channels=mlx_image.channels
|
|
276
|
+
)
|
|
277
|
+
|
|
278
|
+
@staticmethod
|
|
279
|
+
def _get_mlx_path():
|
|
280
|
+
"""Get the MLX py-lib path."""
|
|
281
|
+
from pathlib import Path
|
|
282
|
+
current_dir = Path(__file__).parent.parent.parent.parent # Go back to nexa root
|
|
283
|
+
possible_paths = [
|
|
284
|
+
current_dir / "plugins" / "mlx" / "py-lib",
|
|
285
|
+
Path(os.environ.get("NEXA_MLX_PATH", "")) if os.environ.get("NEXA_MLX_PATH") else None,
|
|
286
|
+
]
|
|
287
|
+
|
|
288
|
+
for path in possible_paths:
|
|
289
|
+
if path and path.exists() and (path / "sd" / "interface.py").exists():
|
|
290
|
+
return str(path.absolute())
|
|
291
|
+
|
|
292
|
+
raise ImportError("Could not find MLX py-lib directory")
|