nexaai 1.0.29__cp310-cp310-macosx_14_0_universal2.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nexaai/__init__.py +99 -0
- nexaai/_stub.cpython-310-darwin.so +0 -0
- nexaai/_version.py +4 -0
- nexaai/asr.py +68 -0
- nexaai/asr_impl/__init__.py +0 -0
- nexaai/asr_impl/mlx_asr_impl.py +93 -0
- nexaai/asr_impl/pybind_asr_impl.py +127 -0
- nexaai/base.py +39 -0
- nexaai/binds/__init__.py +7 -0
- nexaai/binds/asr_bind.cpython-310-darwin.so +0 -0
- nexaai/binds/common_bind.cpython-310-darwin.so +0 -0
- nexaai/binds/cpu_gpu/libggml-base.dylib +0 -0
- nexaai/binds/cpu_gpu/libggml-cpu.so +0 -0
- nexaai/binds/cpu_gpu/libggml-metal.so +0 -0
- nexaai/binds/cpu_gpu/libggml.dylib +0 -0
- nexaai/binds/cpu_gpu/libmtmd.dylib +0 -0
- nexaai/binds/cpu_gpu/libnexa_cpu_gpu.dylib +0 -0
- nexaai/binds/cpu_gpu/libnexa_plugin.dylib +0 -0
- nexaai/binds/cv_bind.cpython-310-darwin.so +0 -0
- nexaai/binds/diarize_bind.cpython-310-darwin.so +0 -0
- nexaai/binds/embedder_bind.cpython-310-darwin.so +0 -0
- nexaai/binds/libnexa_bridge.dylib +0 -0
- nexaai/binds/llm_bind.cpython-310-darwin.so +0 -0
- nexaai/binds/metal/libnexa_plugin.dylib +0 -0
- nexaai/binds/metal/py-lib/ml.py +888 -0
- nexaai/binds/metal/py-lib/mlx_audio/__init__.py +0 -0
- nexaai/binds/metal/py-lib/mlx_audio/codec/__init__.py +1 -0
- nexaai/binds/metal/py-lib/mlx_audio/codec/models/__init__.py +5 -0
- nexaai/binds/metal/py-lib/mlx_audio/codec/models/bigvgan/__init__.py +1 -0
- nexaai/binds/metal/py-lib/mlx_audio/codec/models/bigvgan/activation.py +51 -0
- nexaai/binds/metal/py-lib/mlx_audio/codec/models/bigvgan/amp.py +96 -0
- nexaai/binds/metal/py-lib/mlx_audio/codec/models/bigvgan/bigvgan.py +149 -0
- nexaai/binds/metal/py-lib/mlx_audio/codec/models/bigvgan/conv.py +114 -0
- nexaai/binds/metal/py-lib/mlx_audio/codec/models/bigvgan/resample.py +177 -0
- nexaai/binds/metal/py-lib/mlx_audio/codec/models/descript/__init__.py +1 -0
- nexaai/binds/metal/py-lib/mlx_audio/codec/models/descript/base.py +228 -0
- nexaai/binds/metal/py-lib/mlx_audio/codec/models/descript/dac.py +285 -0
- nexaai/binds/metal/py-lib/mlx_audio/codec/models/descript/nn/__init__.py +1 -0
- nexaai/binds/metal/py-lib/mlx_audio/codec/models/descript/nn/layers.py +129 -0
- nexaai/binds/metal/py-lib/mlx_audio/codec/models/descript/nn/quantize.py +149 -0
- nexaai/binds/metal/py-lib/mlx_audio/codec/models/encodec/__init__.py +1 -0
- nexaai/binds/metal/py-lib/mlx_audio/codec/models/encodec/encodec.py +777 -0
- nexaai/binds/metal/py-lib/mlx_audio/codec/models/mimi/__init__.py +1 -0
- nexaai/binds/metal/py-lib/mlx_audio/codec/models/mimi/mimi.py +286 -0
- nexaai/binds/metal/py-lib/mlx_audio/codec/models/mimi/modules/__init__.py +20 -0
- nexaai/binds/metal/py-lib/mlx_audio/codec/models/mimi/modules/conv.py +398 -0
- nexaai/binds/metal/py-lib/mlx_audio/codec/models/mimi/modules/kv_cache.py +199 -0
- nexaai/binds/metal/py-lib/mlx_audio/codec/models/mimi/modules/quantization.py +179 -0
- nexaai/binds/metal/py-lib/mlx_audio/codec/models/mimi/modules/seanet.py +314 -0
- nexaai/binds/metal/py-lib/mlx_audio/codec/models/mimi/modules/transformer.py +256 -0
- nexaai/binds/metal/py-lib/mlx_audio/codec/models/s3/__init__.py +1 -0
- nexaai/binds/metal/py-lib/mlx_audio/codec/models/s3/model.py +260 -0
- nexaai/binds/metal/py-lib/mlx_audio/codec/models/s3/model_v2.py +383 -0
- nexaai/binds/metal/py-lib/mlx_audio/codec/models/s3/utils.py +122 -0
- nexaai/binds/metal/py-lib/mlx_audio/codec/models/snac/__init__.py +1 -0
- nexaai/binds/metal/py-lib/mlx_audio/codec/models/snac/attention.py +97 -0
- nexaai/binds/metal/py-lib/mlx_audio/codec/models/snac/layers.py +306 -0
- nexaai/binds/metal/py-lib/mlx_audio/codec/models/snac/snac.py +154 -0
- nexaai/binds/metal/py-lib/mlx_audio/codec/models/snac/vq.py +135 -0
- nexaai/binds/metal/py-lib/mlx_audio/codec/models/vocos/__init__.py +1 -0
- nexaai/binds/metal/py-lib/mlx_audio/codec/models/vocos/mel.py +33 -0
- nexaai/binds/metal/py-lib/mlx_audio/codec/models/vocos/vocos.py +359 -0
- nexaai/binds/metal/py-lib/mlx_audio/codec/tests/__init__.py +0 -0
- nexaai/binds/metal/py-lib/mlx_audio/codec/tests/test_bigvgan.py +54 -0
- nexaai/binds/metal/py-lib/mlx_audio/codec/tests/test_descript.py +109 -0
- nexaai/binds/metal/py-lib/mlx_audio/codec/tests/test_encodec.py +58 -0
- nexaai/binds/metal/py-lib/mlx_audio/codec/tests/test_mimi.py +22 -0
- nexaai/binds/metal/py-lib/mlx_audio/codec/tests/test_s3.py +25 -0
- nexaai/binds/metal/py-lib/mlx_audio/codec/tests/test_snac.py +40 -0
- nexaai/binds/metal/py-lib/mlx_audio/codec/tests/test_vocos.py +93 -0
- nexaai/binds/metal/py-lib/mlx_audio/server.py +525 -0
- nexaai/binds/metal/py-lib/mlx_audio/sts/__init__.py +0 -0
- nexaai/binds/metal/py-lib/mlx_audio/sts/tests/test_voice_pipeline.py +156 -0
- nexaai/binds/metal/py-lib/mlx_audio/sts/voice_pipeline.py +327 -0
- nexaai/binds/metal/py-lib/mlx_audio/stt/__init__.py +0 -0
- nexaai/binds/metal/py-lib/mlx_audio/stt/generate.py +174 -0
- nexaai/binds/metal/py-lib/mlx_audio/stt/models/__init__.py +0 -0
- nexaai/binds/metal/py-lib/mlx_audio/stt/models/parakeet/__init__.py +1 -0
- nexaai/binds/metal/py-lib/mlx_audio/stt/models/parakeet/alignment.py +248 -0
- nexaai/binds/metal/py-lib/mlx_audio/stt/models/parakeet/attention.py +187 -0
- nexaai/binds/metal/py-lib/mlx_audio/stt/models/parakeet/audio.py +76 -0
- nexaai/binds/metal/py-lib/mlx_audio/stt/models/parakeet/conformer.py +331 -0
- nexaai/binds/metal/py-lib/mlx_audio/stt/models/parakeet/ctc.py +34 -0
- nexaai/binds/metal/py-lib/mlx_audio/stt/models/parakeet/parakeet.py +604 -0
- nexaai/binds/metal/py-lib/mlx_audio/stt/models/parakeet/rnnt.py +157 -0
- nexaai/binds/metal/py-lib/mlx_audio/stt/models/parakeet/tokenizer.py +2 -0
- nexaai/binds/metal/py-lib/mlx_audio/stt/models/wav2vec/feature_extractor.py +757 -0
- nexaai/binds/metal/py-lib/mlx_audio/stt/models/wav2vec/wav2vec.py +738 -0
- nexaai/binds/metal/py-lib/mlx_audio/stt/models/whisper/__init__.py +1 -0
- nexaai/binds/metal/py-lib/mlx_audio/stt/models/whisper/audio.py +82 -0
- nexaai/binds/metal/py-lib/mlx_audio/stt/models/whisper/decoding.py +742 -0
- nexaai/binds/metal/py-lib/mlx_audio/stt/models/whisper/timing.py +329 -0
- nexaai/binds/metal/py-lib/mlx_audio/stt/models/whisper/tokenizer.py +398 -0
- nexaai/binds/metal/py-lib/mlx_audio/stt/models/whisper/whisper.py +862 -0
- nexaai/binds/metal/py-lib/mlx_audio/stt/models/whisper/writers.py +268 -0
- nexaai/binds/metal/py-lib/mlx_audio/stt/tests/test_models.py +381 -0
- nexaai/binds/metal/py-lib/mlx_audio/stt/utils.py +195 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/__init__.py +1 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/audio_player.py +120 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/convert.py +71 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/generate.py +449 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/__init__.py +0 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/bark/__init__.py +4 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/bark/bark.py +528 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/bark/isftnet.py +12 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/bark/pipeline.py +442 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/base.py +84 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/dia/__init__.py +1 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/dia/audio.py +287 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/dia/config.py +256 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/dia/dia.py +592 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/dia/layers.py +870 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/indextts/__init__.py +3 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/indextts/attention.py +180 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/indextts/bigvgan.py +124 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/indextts/conformer.py +247 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/indextts/ecapa_tdnn/__init__.py +0 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/indextts/ecapa_tdnn/asp.py +59 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/indextts/ecapa_tdnn/ecapa_tdnn.py +91 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/indextts/ecapa_tdnn/se_res2net.py +132 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/indextts/ecapa_tdnn/tdnn.py +42 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/indextts/gpt2.py +38 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/indextts/indextts.py +412 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/indextts/mel.py +37 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/indextts/normalize.py +294 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/indextts/perceiver.py +62 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/interpolate.py +108 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/kokoro/__init__.py +4 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/kokoro/istftnet.py +979 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/kokoro/kokoro.py +331 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/kokoro/modules.py +659 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/kokoro/pipeline.py +453 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/kokoro/voice.py +113 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/llama/__init__.py +3 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/llama/llama.py +324 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/outetts/__init__.py +1 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/outetts/audio_processor.py +351 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/outetts/dac_interface.py +162 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/outetts/outetts.py +255 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/outetts/prompt_processor.py +181 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/outetts/tokens.py +36 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/sesame/__init__.py +3 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/sesame/attention.py +195 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/sesame/sesame.py +633 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/sesame/watermarking.py +105 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/spark/__init__.py +1 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/spark/audio_tokenizer.py +138 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/spark/bicodec.py +269 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/spark/modules/__init__.py +0 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/spark/modules/blocks/__init__.py +0 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/spark/modules/blocks/sampler.py +111 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/spark/modules/encoder_decoder/__init__.py +0 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/spark/modules/encoder_decoder/feat_decoder.py +120 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/spark/modules/encoder_decoder/feat_encoder.py +136 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/spark/modules/encoder_decoder/wave_generator.py +113 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/spark/modules/finite_scalar_quantization.py +238 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/spark/modules/residual.py +209 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/spark/modules/residual_fsq.py +309 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/spark/modules/speaker/__init__.py +1 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/spark/modules/speaker/ecapa_tdnn.py +283 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/spark/modules/speaker/perceiver_encoder.py +326 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/spark/modules/speaker/pooling_layers.py +297 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/spark/modules/speaker/speaker_encoder.py +155 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/spark/spark.py +382 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/spark/utils/audio.py +220 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/spark/utils/file.py +221 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/models/spark/utils/token_parser.py +181 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/tests/__init__.py +0 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/tests/test_base.py +66 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/tests/test_convert.py +173 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/tests/test_interpolate.py +88 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/tests/test_models.py +974 -0
- nexaai/binds/metal/py-lib/mlx_audio/tts/utils.py +337 -0
- nexaai/binds/metal/py-lib/mlx_audio/utils.py +237 -0
- nexaai/binds/metal/py-lib/mlx_audio/version.py +1 -0
- nexaai/binds/metal/py-lib/profiling.py +239 -0
- nexaai/binds/nexaml/libfftw3.3.dylib +0 -0
- nexaai/binds/nexaml/libfftw3f.3.dylib +0 -0
- nexaai/binds/nexaml/libggml-base.dylib +0 -0
- nexaai/binds/nexaml/libggml-cpu.so +0 -0
- nexaai/binds/nexaml/libggml-metal.so +0 -0
- nexaai/binds/nexaml/libggml.dylib +0 -0
- nexaai/binds/nexaml/libmp3lame.0.dylib +0 -0
- nexaai/binds/nexaml/libmpg123.0.dylib +0 -0
- nexaai/binds/nexaml/libnexa-mm-process.dylib +0 -0
- nexaai/binds/nexaml/libnexa-sampling.dylib +0 -0
- nexaai/binds/nexaml/libnexa_plugin.dylib +0 -0
- nexaai/binds/nexaml/libnexaproc.dylib +0 -0
- nexaai/binds/nexaml/libomp.dylib +0 -0
- nexaai/binds/nexaml/libqwen3-vl.dylib +0 -0
- nexaai/binds/nexaml/libqwen3vl-vision.dylib +0 -0
- nexaai/binds/rerank_bind.cpython-310-darwin.so +0 -0
- nexaai/binds/vlm_bind.cpython-310-darwin.so +0 -0
- nexaai/common.py +106 -0
- nexaai/cv.py +95 -0
- nexaai/cv_impl/__init__.py +0 -0
- nexaai/cv_impl/mlx_cv_impl.py +91 -0
- nexaai/cv_impl/pybind_cv_impl.py +124 -0
- nexaai/diarize.py +80 -0
- nexaai/diarize_impl/__init__.py +1 -0
- nexaai/diarize_impl/pybind_diarize_impl.py +125 -0
- nexaai/embedder.py +73 -0
- nexaai/embedder_impl/__init__.py +0 -0
- nexaai/embedder_impl/mlx_embedder_impl.py +118 -0
- nexaai/embedder_impl/pybind_embedder_impl.py +96 -0
- nexaai/image_gen.py +141 -0
- nexaai/image_gen_impl/__init__.py +0 -0
- nexaai/image_gen_impl/mlx_image_gen_impl.py +292 -0
- nexaai/image_gen_impl/pybind_image_gen_impl.py +85 -0
- nexaai/llm.py +98 -0
- nexaai/llm_impl/__init__.py +0 -0
- nexaai/llm_impl/mlx_llm_impl.py +271 -0
- nexaai/llm_impl/pybind_llm_impl.py +238 -0
- nexaai/log.py +92 -0
- nexaai/mlx_backend/asr/__init__.py +12 -0
- nexaai/mlx_backend/asr/interface.py +122 -0
- nexaai/mlx_backend/common/__init__.py +0 -0
- nexaai/mlx_backend/common/utils.py +25 -0
- nexaai/mlx_backend/cv/__init__.py +0 -0
- nexaai/mlx_backend/cv/generate.py +195 -0
- nexaai/mlx_backend/cv/interface.py +162 -0
- nexaai/mlx_backend/cv/main.py +81 -0
- nexaai/mlx_backend/cv/modeling/pp_ocr_v4.py +1736 -0
- nexaai/mlx_backend/embedding/__init__.py +0 -0
- nexaai/mlx_backend/embedding/generate.py +333 -0
- nexaai/mlx_backend/embedding/interface.py +617 -0
- nexaai/mlx_backend/embedding/main.py +173 -0
- nexaai/mlx_backend/embedding/modeling/__init__.py +0 -0
- nexaai/mlx_backend/embedding/modeling/nexa_jina_v2.py +399 -0
- nexaai/mlx_backend/image_gen/__init__.py +1 -0
- nexaai/mlx_backend/image_gen/generate_sd.py +244 -0
- nexaai/mlx_backend/image_gen/interface.py +82 -0
- nexaai/mlx_backend/image_gen/main.py +281 -0
- nexaai/mlx_backend/image_gen/stable_diffusion/__init__.py +306 -0
- nexaai/mlx_backend/image_gen/stable_diffusion/clip.py +116 -0
- nexaai/mlx_backend/image_gen/stable_diffusion/config.py +65 -0
- nexaai/mlx_backend/image_gen/stable_diffusion/model_io.py +386 -0
- nexaai/mlx_backend/image_gen/stable_diffusion/sampler.py +105 -0
- nexaai/mlx_backend/image_gen/stable_diffusion/tokenizer.py +100 -0
- nexaai/mlx_backend/image_gen/stable_diffusion/unet.py +460 -0
- nexaai/mlx_backend/image_gen/stable_diffusion/vae.py +274 -0
- nexaai/mlx_backend/llm/__init__.py +0 -0
- nexaai/mlx_backend/llm/generate.py +149 -0
- nexaai/mlx_backend/llm/interface.py +764 -0
- nexaai/mlx_backend/llm/main.py +68 -0
- nexaai/mlx_backend/ml.py +888 -0
- nexaai/mlx_backend/mlx_audio/__init__.py +0 -0
- nexaai/mlx_backend/mlx_audio/codec/__init__.py +1 -0
- nexaai/mlx_backend/mlx_audio/codec/models/__init__.py +5 -0
- nexaai/mlx_backend/mlx_audio/codec/models/bigvgan/__init__.py +1 -0
- nexaai/mlx_backend/mlx_audio/codec/models/bigvgan/activation.py +51 -0
- nexaai/mlx_backend/mlx_audio/codec/models/bigvgan/amp.py +96 -0
- nexaai/mlx_backend/mlx_audio/codec/models/bigvgan/bigvgan.py +149 -0
- nexaai/mlx_backend/mlx_audio/codec/models/bigvgan/conv.py +114 -0
- nexaai/mlx_backend/mlx_audio/codec/models/bigvgan/resample.py +177 -0
- nexaai/mlx_backend/mlx_audio/codec/models/descript/__init__.py +1 -0
- nexaai/mlx_backend/mlx_audio/codec/models/descript/base.py +228 -0
- nexaai/mlx_backend/mlx_audio/codec/models/descript/dac.py +285 -0
- nexaai/mlx_backend/mlx_audio/codec/models/descript/nn/__init__.py +1 -0
- nexaai/mlx_backend/mlx_audio/codec/models/descript/nn/layers.py +129 -0
- nexaai/mlx_backend/mlx_audio/codec/models/descript/nn/quantize.py +149 -0
- nexaai/mlx_backend/mlx_audio/codec/models/encodec/__init__.py +1 -0
- nexaai/mlx_backend/mlx_audio/codec/models/encodec/encodec.py +777 -0
- nexaai/mlx_backend/mlx_audio/codec/models/mimi/__init__.py +1 -0
- nexaai/mlx_backend/mlx_audio/codec/models/mimi/mimi.py +286 -0
- nexaai/mlx_backend/mlx_audio/codec/models/mimi/modules/__init__.py +20 -0
- nexaai/mlx_backend/mlx_audio/codec/models/mimi/modules/conv.py +398 -0
- nexaai/mlx_backend/mlx_audio/codec/models/mimi/modules/kv_cache.py +199 -0
- nexaai/mlx_backend/mlx_audio/codec/models/mimi/modules/quantization.py +179 -0
- nexaai/mlx_backend/mlx_audio/codec/models/mimi/modules/seanet.py +314 -0
- nexaai/mlx_backend/mlx_audio/codec/models/mimi/modules/transformer.py +256 -0
- nexaai/mlx_backend/mlx_audio/codec/models/s3/__init__.py +1 -0
- nexaai/mlx_backend/mlx_audio/codec/models/s3/model.py +260 -0
- nexaai/mlx_backend/mlx_audio/codec/models/s3/model_v2.py +383 -0
- nexaai/mlx_backend/mlx_audio/codec/models/s3/utils.py +122 -0
- nexaai/mlx_backend/mlx_audio/codec/models/snac/__init__.py +1 -0
- nexaai/mlx_backend/mlx_audio/codec/models/snac/attention.py +97 -0
- nexaai/mlx_backend/mlx_audio/codec/models/snac/layers.py +306 -0
- nexaai/mlx_backend/mlx_audio/codec/models/snac/snac.py +154 -0
- nexaai/mlx_backend/mlx_audio/codec/models/snac/vq.py +135 -0
- nexaai/mlx_backend/mlx_audio/codec/models/vocos/__init__.py +1 -0
- nexaai/mlx_backend/mlx_audio/codec/models/vocos/mel.py +33 -0
- nexaai/mlx_backend/mlx_audio/codec/models/vocos/vocos.py +359 -0
- nexaai/mlx_backend/mlx_audio/codec/tests/__init__.py +0 -0
- nexaai/mlx_backend/mlx_audio/codec/tests/test_bigvgan.py +54 -0
- nexaai/mlx_backend/mlx_audio/codec/tests/test_descript.py +109 -0
- nexaai/mlx_backend/mlx_audio/codec/tests/test_encodec.py +58 -0
- nexaai/mlx_backend/mlx_audio/codec/tests/test_mimi.py +22 -0
- nexaai/mlx_backend/mlx_audio/codec/tests/test_s3.py +25 -0
- nexaai/mlx_backend/mlx_audio/codec/tests/test_snac.py +40 -0
- nexaai/mlx_backend/mlx_audio/codec/tests/test_vocos.py +93 -0
- nexaai/mlx_backend/mlx_audio/server.py +525 -0
- nexaai/mlx_backend/mlx_audio/sts/__init__.py +0 -0
- nexaai/mlx_backend/mlx_audio/sts/tests/test_voice_pipeline.py +156 -0
- nexaai/mlx_backend/mlx_audio/sts/voice_pipeline.py +327 -0
- nexaai/mlx_backend/mlx_audio/stt/__init__.py +0 -0
- nexaai/mlx_backend/mlx_audio/stt/generate.py +174 -0
- nexaai/mlx_backend/mlx_audio/stt/models/__init__.py +0 -0
- nexaai/mlx_backend/mlx_audio/stt/models/parakeet/__init__.py +1 -0
- nexaai/mlx_backend/mlx_audio/stt/models/parakeet/alignment.py +248 -0
- nexaai/mlx_backend/mlx_audio/stt/models/parakeet/attention.py +187 -0
- nexaai/mlx_backend/mlx_audio/stt/models/parakeet/audio.py +76 -0
- nexaai/mlx_backend/mlx_audio/stt/models/parakeet/conformer.py +331 -0
- nexaai/mlx_backend/mlx_audio/stt/models/parakeet/ctc.py +34 -0
- nexaai/mlx_backend/mlx_audio/stt/models/parakeet/parakeet.py +604 -0
- nexaai/mlx_backend/mlx_audio/stt/models/parakeet/rnnt.py +157 -0
- nexaai/mlx_backend/mlx_audio/stt/models/parakeet/tokenizer.py +2 -0
- nexaai/mlx_backend/mlx_audio/stt/models/wav2vec/feature_extractor.py +757 -0
- nexaai/mlx_backend/mlx_audio/stt/models/wav2vec/wav2vec.py +738 -0
- nexaai/mlx_backend/mlx_audio/stt/models/whisper/__init__.py +1 -0
- nexaai/mlx_backend/mlx_audio/stt/models/whisper/audio.py +82 -0
- nexaai/mlx_backend/mlx_audio/stt/models/whisper/decoding.py +742 -0
- nexaai/mlx_backend/mlx_audio/stt/models/whisper/timing.py +329 -0
- nexaai/mlx_backend/mlx_audio/stt/models/whisper/tokenizer.py +398 -0
- nexaai/mlx_backend/mlx_audio/stt/models/whisper/whisper.py +862 -0
- nexaai/mlx_backend/mlx_audio/stt/models/whisper/writers.py +268 -0
- nexaai/mlx_backend/mlx_audio/stt/tests/test_models.py +381 -0
- nexaai/mlx_backend/mlx_audio/stt/utils.py +195 -0
- nexaai/mlx_backend/mlx_audio/tts/__init__.py +1 -0
- nexaai/mlx_backend/mlx_audio/tts/audio_player.py +120 -0
- nexaai/mlx_backend/mlx_audio/tts/convert.py +71 -0
- nexaai/mlx_backend/mlx_audio/tts/generate.py +449 -0
- nexaai/mlx_backend/mlx_audio/tts/models/__init__.py +0 -0
- nexaai/mlx_backend/mlx_audio/tts/models/bark/__init__.py +4 -0
- nexaai/mlx_backend/mlx_audio/tts/models/bark/bark.py +528 -0
- nexaai/mlx_backend/mlx_audio/tts/models/bark/isftnet.py +12 -0
- nexaai/mlx_backend/mlx_audio/tts/models/bark/pipeline.py +442 -0
- nexaai/mlx_backend/mlx_audio/tts/models/base.py +84 -0
- nexaai/mlx_backend/mlx_audio/tts/models/dia/__init__.py +1 -0
- nexaai/mlx_backend/mlx_audio/tts/models/dia/audio.py +287 -0
- nexaai/mlx_backend/mlx_audio/tts/models/dia/config.py +256 -0
- nexaai/mlx_backend/mlx_audio/tts/models/dia/dia.py +592 -0
- nexaai/mlx_backend/mlx_audio/tts/models/dia/layers.py +870 -0
- nexaai/mlx_backend/mlx_audio/tts/models/indextts/__init__.py +3 -0
- nexaai/mlx_backend/mlx_audio/tts/models/indextts/attention.py +180 -0
- nexaai/mlx_backend/mlx_audio/tts/models/indextts/bigvgan.py +124 -0
- nexaai/mlx_backend/mlx_audio/tts/models/indextts/conformer.py +247 -0
- nexaai/mlx_backend/mlx_audio/tts/models/indextts/ecapa_tdnn/__init__.py +0 -0
- nexaai/mlx_backend/mlx_audio/tts/models/indextts/ecapa_tdnn/asp.py +59 -0
- nexaai/mlx_backend/mlx_audio/tts/models/indextts/ecapa_tdnn/ecapa_tdnn.py +91 -0
- nexaai/mlx_backend/mlx_audio/tts/models/indextts/ecapa_tdnn/se_res2net.py +132 -0
- nexaai/mlx_backend/mlx_audio/tts/models/indextts/ecapa_tdnn/tdnn.py +42 -0
- nexaai/mlx_backend/mlx_audio/tts/models/indextts/gpt2.py +38 -0
- nexaai/mlx_backend/mlx_audio/tts/models/indextts/indextts.py +412 -0
- nexaai/mlx_backend/mlx_audio/tts/models/indextts/mel.py +37 -0
- nexaai/mlx_backend/mlx_audio/tts/models/indextts/normalize.py +294 -0
- nexaai/mlx_backend/mlx_audio/tts/models/indextts/perceiver.py +62 -0
- nexaai/mlx_backend/mlx_audio/tts/models/interpolate.py +108 -0
- nexaai/mlx_backend/mlx_audio/tts/models/kokoro/__init__.py +4 -0
- nexaai/mlx_backend/mlx_audio/tts/models/kokoro/istftnet.py +979 -0
- nexaai/mlx_backend/mlx_audio/tts/models/kokoro/kokoro.py +331 -0
- nexaai/mlx_backend/mlx_audio/tts/models/kokoro/modules.py +659 -0
- nexaai/mlx_backend/mlx_audio/tts/models/kokoro/pipeline.py +453 -0
- nexaai/mlx_backend/mlx_audio/tts/models/kokoro/voice.py +113 -0
- nexaai/mlx_backend/mlx_audio/tts/models/llama/__init__.py +3 -0
- nexaai/mlx_backend/mlx_audio/tts/models/llama/llama.py +324 -0
- nexaai/mlx_backend/mlx_audio/tts/models/outetts/__init__.py +1 -0
- nexaai/mlx_backend/mlx_audio/tts/models/outetts/audio_processor.py +351 -0
- nexaai/mlx_backend/mlx_audio/tts/models/outetts/dac_interface.py +162 -0
- nexaai/mlx_backend/mlx_audio/tts/models/outetts/default_speaker.json +461 -0
- nexaai/mlx_backend/mlx_audio/tts/models/outetts/outetts.py +255 -0
- nexaai/mlx_backend/mlx_audio/tts/models/outetts/prompt_processor.py +181 -0
- nexaai/mlx_backend/mlx_audio/tts/models/outetts/tokens.py +36 -0
- nexaai/mlx_backend/mlx_audio/tts/models/sesame/__init__.py +3 -0
- nexaai/mlx_backend/mlx_audio/tts/models/sesame/attention.py +195 -0
- nexaai/mlx_backend/mlx_audio/tts/models/sesame/sesame.py +633 -0
- nexaai/mlx_backend/mlx_audio/tts/models/sesame/watermarking.py +105 -0
- nexaai/mlx_backend/mlx_audio/tts/models/spark/__init__.py +1 -0
- nexaai/mlx_backend/mlx_audio/tts/models/spark/audio_tokenizer.py +138 -0
- nexaai/mlx_backend/mlx_audio/tts/models/spark/bicodec.py +269 -0
- nexaai/mlx_backend/mlx_audio/tts/models/spark/modules/__init__.py +0 -0
- nexaai/mlx_backend/mlx_audio/tts/models/spark/modules/blocks/__init__.py +0 -0
- nexaai/mlx_backend/mlx_audio/tts/models/spark/modules/blocks/sampler.py +111 -0
- nexaai/mlx_backend/mlx_audio/tts/models/spark/modules/encoder_decoder/__init__.py +0 -0
- nexaai/mlx_backend/mlx_audio/tts/models/spark/modules/encoder_decoder/feat_decoder.py +120 -0
- nexaai/mlx_backend/mlx_audio/tts/models/spark/modules/encoder_decoder/feat_encoder.py +136 -0
- nexaai/mlx_backend/mlx_audio/tts/models/spark/modules/encoder_decoder/wave_generator.py +113 -0
- nexaai/mlx_backend/mlx_audio/tts/models/spark/modules/finite_scalar_quantization.py +238 -0
- nexaai/mlx_backend/mlx_audio/tts/models/spark/modules/residual.py +209 -0
- nexaai/mlx_backend/mlx_audio/tts/models/spark/modules/residual_fsq.py +309 -0
- nexaai/mlx_backend/mlx_audio/tts/models/spark/modules/speaker/__init__.py +1 -0
- nexaai/mlx_backend/mlx_audio/tts/models/spark/modules/speaker/ecapa_tdnn.py +283 -0
- nexaai/mlx_backend/mlx_audio/tts/models/spark/modules/speaker/perceiver_encoder.py +326 -0
- nexaai/mlx_backend/mlx_audio/tts/models/spark/modules/speaker/pooling_layers.py +297 -0
- nexaai/mlx_backend/mlx_audio/tts/models/spark/modules/speaker/speaker_encoder.py +155 -0
- nexaai/mlx_backend/mlx_audio/tts/models/spark/spark.py +382 -0
- nexaai/mlx_backend/mlx_audio/tts/models/spark/utils/audio.py +220 -0
- nexaai/mlx_backend/mlx_audio/tts/models/spark/utils/file.py +221 -0
- nexaai/mlx_backend/mlx_audio/tts/models/spark/utils/token_parser.py +181 -0
- nexaai/mlx_backend/mlx_audio/tts/tests/__init__.py +0 -0
- nexaai/mlx_backend/mlx_audio/tts/tests/test_base.py +66 -0
- nexaai/mlx_backend/mlx_audio/tts/tests/test_convert.py +173 -0
- nexaai/mlx_backend/mlx_audio/tts/tests/test_interpolate.py +88 -0
- nexaai/mlx_backend/mlx_audio/tts/tests/test_models.py +974 -0
- nexaai/mlx_backend/mlx_audio/tts/utils.py +337 -0
- nexaai/mlx_backend/mlx_audio/utils.py +237 -0
- nexaai/mlx_backend/mlx_audio/version.py +1 -0
- nexaai/mlx_backend/profiling.py +239 -0
- nexaai/mlx_backend/rerank/__init__.py +0 -0
- nexaai/mlx_backend/rerank/generate.py +174 -0
- nexaai/mlx_backend/rerank/interface.py +287 -0
- nexaai/mlx_backend/rerank/main.py +127 -0
- nexaai/mlx_backend/rerank/modeling/__init__.py +0 -0
- nexaai/mlx_backend/rerank/modeling/nexa_jina_rerank.py +330 -0
- nexaai/mlx_backend/sd/__init__.py +1 -0
- nexaai/mlx_backend/sd/interface.py +362 -0
- nexaai/mlx_backend/sd/main.py +286 -0
- nexaai/mlx_backend/sd/modeling/__init__.py +306 -0
- nexaai/mlx_backend/sd/modeling/clip.py +116 -0
- nexaai/mlx_backend/sd/modeling/config.py +65 -0
- nexaai/mlx_backend/sd/modeling/model_io.py +385 -0
- nexaai/mlx_backend/sd/modeling/sampler.py +105 -0
- nexaai/mlx_backend/sd/modeling/tokenizer.py +100 -0
- nexaai/mlx_backend/sd/modeling/unet.py +460 -0
- nexaai/mlx_backend/sd/modeling/vae.py +274 -0
- nexaai/mlx_backend/tts/__init__.py +12 -0
- nexaai/mlx_backend/tts/interface.py +276 -0
- nexaai/mlx_backend/vlm/__init__.py +3 -0
- nexaai/mlx_backend/vlm/generate.py +572 -0
- nexaai/mlx_backend/vlm/generate_qwen3_vl.py +374 -0
- nexaai/mlx_backend/vlm/generate_qwen3_vl_moe.py +259 -0
- nexaai/mlx_backend/vlm/interface.py +559 -0
- nexaai/mlx_backend/vlm/main.py +365 -0
- nexaai/mlx_backend/vlm/modeling/__init__.py +0 -0
- nexaai/mlx_backend/vlm/modeling/convert.py +68 -0
- nexaai/mlx_backend/vlm/modeling/models/__init__.py +0 -0
- nexaai/mlx_backend/vlm/modeling/models/aya_vision/__init__.py +8 -0
- nexaai/mlx_backend/vlm/modeling/models/aya_vision/aya_vision.py +193 -0
- nexaai/mlx_backend/vlm/modeling/models/aya_vision/interpolate.py +186 -0
- nexaai/mlx_backend/vlm/modeling/models/aya_vision/language.py +233 -0
- nexaai/mlx_backend/vlm/modeling/models/aya_vision/vision.py +503 -0
- nexaai/mlx_backend/vlm/modeling/models/base.py +202 -0
- nexaai/mlx_backend/vlm/modeling/models/cache.py +230 -0
- nexaai/mlx_backend/vlm/modeling/models/deepseek_vl_v2/__init__.py +10 -0
- nexaai/mlx_backend/vlm/modeling/models/deepseek_vl_v2/conversation.py +264 -0
- nexaai/mlx_backend/vlm/modeling/models/deepseek_vl_v2/deepseek_vl_v2.py +472 -0
- nexaai/mlx_backend/vlm/modeling/models/deepseek_vl_v2/language.py +591 -0
- nexaai/mlx_backend/vlm/modeling/models/deepseek_vl_v2/processing_deepsek_vl_v2.py +526 -0
- nexaai/mlx_backend/vlm/modeling/models/deepseek_vl_v2/vision.py +356 -0
- nexaai/mlx_backend/vlm/modeling/models/florence2/__init__.py +8 -0
- nexaai/mlx_backend/vlm/modeling/models/florence2/florence2.py +366 -0
- nexaai/mlx_backend/vlm/modeling/models/florence2/language.py +488 -0
- nexaai/mlx_backend/vlm/modeling/models/florence2/vision.py +591 -0
- nexaai/mlx_backend/vlm/modeling/models/gemma3/__init__.py +8 -0
- nexaai/mlx_backend/vlm/modeling/models/gemma3/gemma3.py +213 -0
- nexaai/mlx_backend/vlm/modeling/models/gemma3/language.py +315 -0
- nexaai/mlx_backend/vlm/modeling/models/gemma3/vision.py +238 -0
- nexaai/mlx_backend/vlm/modeling/models/gemma3n/__init__.py +2 -0
- nexaai/mlx_backend/vlm/modeling/models/gemma3n/audio.py +1038 -0
- nexaai/mlx_backend/vlm/modeling/models/gemma3n/config.py +139 -0
- nexaai/mlx_backend/vlm/modeling/models/gemma3n/gemma3n.py +322 -0
- nexaai/mlx_backend/vlm/modeling/models/gemma3n/language.py +629 -0
- nexaai/mlx_backend/vlm/modeling/models/gemma3n/vision.py +1022 -0
- nexaai/mlx_backend/vlm/modeling/models/idefics2/__init__.py +9 -0
- nexaai/mlx_backend/vlm/modeling/models/idefics2/idefics2.py +294 -0
- nexaai/mlx_backend/vlm/modeling/models/idefics2/language.py +191 -0
- nexaai/mlx_backend/vlm/modeling/models/idefics2/vision.py +267 -0
- nexaai/mlx_backend/vlm/modeling/models/idefics3/__init__.py +8 -0
- nexaai/mlx_backend/vlm/modeling/models/idefics3/idefics3.py +175 -0
- nexaai/mlx_backend/vlm/modeling/models/idefics3/language.py +192 -0
- nexaai/mlx_backend/vlm/modeling/models/idefics3/vision.py +233 -0
- nexaai/mlx_backend/vlm/modeling/models/internvl_chat/__init__.py +9 -0
- nexaai/mlx_backend/vlm/modeling/models/internvl_chat/internvl_chat.py +140 -0
- nexaai/mlx_backend/vlm/modeling/models/internvl_chat/language.py +220 -0
- nexaai/mlx_backend/vlm/modeling/models/internvl_chat/processor.py +393 -0
- nexaai/mlx_backend/vlm/modeling/models/internvl_chat/vision.py +293 -0
- nexaai/mlx_backend/vlm/modeling/models/kernels.py +307 -0
- nexaai/mlx_backend/vlm/modeling/models/kimi_vl/__init__.py +8 -0
- nexaai/mlx_backend/vlm/modeling/models/kimi_vl/kimi_vl.py +143 -0
- nexaai/mlx_backend/vlm/modeling/models/kimi_vl/language.py +509 -0
- nexaai/mlx_backend/vlm/modeling/models/kimi_vl/vision.py +522 -0
- nexaai/mlx_backend/vlm/modeling/models/llama4/__init__.py +8 -0
- nexaai/mlx_backend/vlm/modeling/models/llama4/language.py +386 -0
- nexaai/mlx_backend/vlm/modeling/models/llama4/llama4.py +138 -0
- nexaai/mlx_backend/vlm/modeling/models/llama4/vision.py +560 -0
- nexaai/mlx_backend/vlm/modeling/models/llava/__init__.py +8 -0
- nexaai/mlx_backend/vlm/modeling/models/llava/language.py +240 -0
- nexaai/mlx_backend/vlm/modeling/models/llava/llava.py +153 -0
- nexaai/mlx_backend/vlm/modeling/models/llava/vision.py +259 -0
- nexaai/mlx_backend/vlm/modeling/models/llava_bunny/__init__.py +9 -0
- nexaai/mlx_backend/vlm/modeling/models/llava_bunny/language.py +236 -0
- nexaai/mlx_backend/vlm/modeling/models/llava_bunny/llava_bunny.py +256 -0
- nexaai/mlx_backend/vlm/modeling/models/llava_bunny/vision.py +303 -0
- nexaai/mlx_backend/vlm/modeling/models/llava_next/__init__.py +8 -0
- nexaai/mlx_backend/vlm/modeling/models/llava_next/language.py +230 -0
- nexaai/mlx_backend/vlm/modeling/models/llava_next/llava_next.py +160 -0
- nexaai/mlx_backend/vlm/modeling/models/llava_next/vision.py +243 -0
- nexaai/mlx_backend/vlm/modeling/models/mistral3/__init__.py +8 -0
- nexaai/mlx_backend/vlm/modeling/models/mistral3/mistral3.py +283 -0
- nexaai/mlx_backend/vlm/modeling/models/mllama/__init__.py +8 -0
- nexaai/mlx_backend/vlm/modeling/models/mllama/language.py +416 -0
- nexaai/mlx_backend/vlm/modeling/models/mllama/mllama.py +172 -0
- nexaai/mlx_backend/vlm/modeling/models/mllama/vision.py +499 -0
- nexaai/mlx_backend/vlm/modeling/models/molmo/__init__.py +8 -0
- nexaai/mlx_backend/vlm/modeling/models/molmo/language.py +243 -0
- nexaai/mlx_backend/vlm/modeling/models/molmo/molmo.py +133 -0
- nexaai/mlx_backend/vlm/modeling/models/molmo/vision.py +465 -0
- nexaai/mlx_backend/vlm/modeling/models/multi_modality/__init__.py +10 -0
- nexaai/mlx_backend/vlm/modeling/models/multi_modality/language.py +230 -0
- nexaai/mlx_backend/vlm/modeling/models/multi_modality/multi_modality.py +385 -0
- nexaai/mlx_backend/vlm/modeling/models/multi_modality/sam.py +557 -0
- nexaai/mlx_backend/vlm/modeling/models/multi_modality/vision.py +526 -0
- nexaai/mlx_backend/vlm/modeling/models/paligemma/__init__.py +8 -0
- nexaai/mlx_backend/vlm/modeling/models/paligemma/language.py +282 -0
- nexaai/mlx_backend/vlm/modeling/models/paligemma/paligemma.py +160 -0
- nexaai/mlx_backend/vlm/modeling/models/paligemma/vision.py +242 -0
- nexaai/mlx_backend/vlm/modeling/models/phi3_v/__init__.py +8 -0
- nexaai/mlx_backend/vlm/modeling/models/phi3_v/language.py +21 -0
- nexaai/mlx_backend/vlm/modeling/models/phi3_v/phi3_v.py +243 -0
- nexaai/mlx_backend/vlm/modeling/models/phi3_v/su_rope.py +71 -0
- nexaai/mlx_backend/vlm/modeling/models/phi3_v/vision.py +324 -0
- nexaai/mlx_backend/vlm/modeling/models/pixtral/__init__.py +8 -0
- nexaai/mlx_backend/vlm/modeling/models/pixtral/language.py +229 -0
- nexaai/mlx_backend/vlm/modeling/models/pixtral/pixtral.py +161 -0
- nexaai/mlx_backend/vlm/modeling/models/pixtral/vision.py +320 -0
- nexaai/mlx_backend/vlm/modeling/models/qwen2_5_vl/__init__.py +2 -0
- nexaai/mlx_backend/vlm/modeling/models/qwen2_5_vl/config.py +108 -0
- nexaai/mlx_backend/vlm/modeling/models/qwen2_5_vl/language.py +490 -0
- nexaai/mlx_backend/vlm/modeling/models/qwen2_5_vl/qwen2_5_vl.py +168 -0
- nexaai/mlx_backend/vlm/modeling/models/qwen2_5_vl/vision.py +414 -0
- nexaai/mlx_backend/vlm/modeling/models/qwen2_vl/__init__.py +2 -0
- nexaai/mlx_backend/vlm/modeling/models/qwen2_vl/config.py +104 -0
- nexaai/mlx_backend/vlm/modeling/models/qwen2_vl/language.py +490 -0
- nexaai/mlx_backend/vlm/modeling/models/qwen2_vl/qwen2_vl.py +167 -0
- nexaai/mlx_backend/vlm/modeling/models/qwen2_vl/vision.py +312 -0
- nexaai/mlx_backend/vlm/modeling/models/qwen3_vl/llm_common/__init__.py +0 -0
- nexaai/mlx_backend/vlm/modeling/models/qwen3_vl/llm_common/base.py +117 -0
- nexaai/mlx_backend/vlm/modeling/models/qwen3_vl/llm_common/cache.py +531 -0
- nexaai/mlx_backend/vlm/modeling/models/qwen3_vl/llm_common/generate.py +701 -0
- nexaai/mlx_backend/vlm/modeling/models/qwen3_vl/llm_common/rope_utils.py +255 -0
- nexaai/mlx_backend/vlm/modeling/models/qwen3_vl/llm_common/sample_utils.py +303 -0
- nexaai/mlx_backend/vlm/modeling/models/qwen3_vl/llm_common/tokenizer_utils.py +407 -0
- nexaai/mlx_backend/vlm/modeling/models/qwen3_vl/processor.py +476 -0
- nexaai/mlx_backend/vlm/modeling/models/qwen3_vl/qwen3vl.py +1262 -0
- nexaai/mlx_backend/vlm/modeling/models/qwen3vl_moe/llm_common/__init__.py +0 -0
- nexaai/mlx_backend/vlm/modeling/models/qwen3vl_moe/llm_common/base.py +117 -0
- nexaai/mlx_backend/vlm/modeling/models/qwen3vl_moe/llm_common/cache.py +531 -0
- nexaai/mlx_backend/vlm/modeling/models/qwen3vl_moe/llm_common/generate.py +701 -0
- nexaai/mlx_backend/vlm/modeling/models/qwen3vl_moe/llm_common/rope_utils.py +255 -0
- nexaai/mlx_backend/vlm/modeling/models/qwen3vl_moe/llm_common/sample_utils.py +303 -0
- nexaai/mlx_backend/vlm/modeling/models/qwen3vl_moe/llm_common/tokenizer_utils.py +407 -0
- nexaai/mlx_backend/vlm/modeling/models/qwen3vl_moe/processor.py +476 -0
- nexaai/mlx_backend/vlm/modeling/models/qwen3vl_moe/qwen3vl_moe.py +1308 -0
- nexaai/mlx_backend/vlm/modeling/models/qwen3vl_moe/switch_layers.py +210 -0
- nexaai/mlx_backend/vlm/modeling/models/smolvlm/__init__.py +8 -0
- nexaai/mlx_backend/vlm/modeling/models/smolvlm/smolvlm.py +62 -0
- nexaai/mlx_backend/vlm/modeling/processing_qwen2_5_vl.py +209 -0
- nexaai/mlx_backend/vlm/modeling/processing_qwen2_vl.py +215 -0
- nexaai/mlx_backend/vlm/modeling/prompt_utils.py +474 -0
- nexaai/mlx_backend/vlm/modeling/sample_utils.py +39 -0
- nexaai/mlx_backend/vlm/modeling/tokenizer_utils.py +344 -0
- nexaai/mlx_backend/vlm/modeling/trainer/__init__.py +9 -0
- nexaai/mlx_backend/vlm/modeling/trainer/lora.py +70 -0
- nexaai/mlx_backend/vlm/modeling/trainer/trainer.py +296 -0
- nexaai/mlx_backend/vlm/modeling/trainer/utils.py +160 -0
- nexaai/mlx_backend/vlm/modeling/utils.py +928 -0
- nexaai/rerank.py +57 -0
- nexaai/rerank_impl/__init__.py +0 -0
- nexaai/rerank_impl/mlx_rerank_impl.py +94 -0
- nexaai/rerank_impl/pybind_rerank_impl.py +136 -0
- nexaai/runtime.py +68 -0
- nexaai/runtime_error.py +24 -0
- nexaai/tts.py +75 -0
- nexaai/tts_impl/__init__.py +0 -0
- nexaai/tts_impl/mlx_tts_impl.py +94 -0
- nexaai/tts_impl/pybind_tts_impl.py +43 -0
- nexaai/utils/decode.py +18 -0
- nexaai/utils/manifest_utils.py +531 -0
- nexaai/utils/model_manager.py +1745 -0
- nexaai/utils/model_types.py +49 -0
- nexaai/utils/progress_tracker.py +389 -0
- nexaai/utils/quantization_utils.py +245 -0
- nexaai/vlm.py +130 -0
- nexaai/vlm_impl/__init__.py +0 -0
- nexaai/vlm_impl/mlx_vlm_impl.py +259 -0
- nexaai/vlm_impl/pybind_vlm_impl.py +275 -0
- nexaai-1.0.29.dist-info/METADATA +35 -0
- nexaai-1.0.29.dist-info/RECORD +580 -0
- nexaai-1.0.29.dist-info/WHEEL +5 -0
- nexaai-1.0.29.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
from typing import Optional, Union, List
|
|
2
|
+
import os
|
|
3
|
+
|
|
4
|
+
from nexaai.common import PluginID, ModelConfig
|
|
5
|
+
from nexaai.cv import CVModel, CVModelConfig, CVResults, CVResult, BoundingBox, CVCapabilities
|
|
6
|
+
from nexaai.binds import cv_bind, common_bind
|
|
7
|
+
from nexaai.runtime import _ensure_runtime
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class PyBindCVImpl(CVModel):
|
|
11
|
+
def __init__(self, handle: any, m_cfg: ModelConfig = ModelConfig()):
|
|
12
|
+
"""Private constructor, should not be called directly."""
|
|
13
|
+
super().__init__()
|
|
14
|
+
self._handle = handle # This is a py::capsule
|
|
15
|
+
self._model_config = None
|
|
16
|
+
|
|
17
|
+
@classmethod
|
|
18
|
+
def _load_from(cls,
|
|
19
|
+
local_path: str, # This is the local path after auto_download_model processing
|
|
20
|
+
model_name: Optional[str] = None,
|
|
21
|
+
m_cfg: CVModelConfig = CVModelConfig(CVCapabilities.OCR),
|
|
22
|
+
plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
|
|
23
|
+
device_id: Optional[str] = None,
|
|
24
|
+
**kwargs
|
|
25
|
+
) -> 'PyBindCVImpl':
|
|
26
|
+
"""Load CV model from configuration using PyBind backend."""
|
|
27
|
+
_ensure_runtime()
|
|
28
|
+
|
|
29
|
+
config = cv_bind.CVModelConfig()
|
|
30
|
+
config.capabilities = cv_bind.CVCapabilities(m_cfg.capabilities)
|
|
31
|
+
if m_cfg.det_model_path is not None:
|
|
32
|
+
config.det_model_path = m_cfg.det_model_path
|
|
33
|
+
else:
|
|
34
|
+
config.det_model_path = local_path
|
|
35
|
+
|
|
36
|
+
print("local_path: ", local_path)
|
|
37
|
+
print("m_cfg.rec_model_path: ", m_cfg.rec_model_path)
|
|
38
|
+
if m_cfg.rec_model_path is not None:
|
|
39
|
+
config.rec_model_path = m_cfg.rec_model_path
|
|
40
|
+
else:
|
|
41
|
+
config.rec_model_path = local_path
|
|
42
|
+
print("config.rec_model_path: ", config.rec_model_path)
|
|
43
|
+
|
|
44
|
+
if m_cfg.char_dict_path is not None:
|
|
45
|
+
config.char_dict_path = m_cfg.char_dict_path
|
|
46
|
+
|
|
47
|
+
if m_cfg.model_path is not None:
|
|
48
|
+
config.model_path = m_cfg.model_path
|
|
49
|
+
|
|
50
|
+
if m_cfg.system_library_path is not None:
|
|
51
|
+
config.system_library_path = m_cfg.system_library_path
|
|
52
|
+
|
|
53
|
+
plugin_id_str = plugin_id.value if isinstance(
|
|
54
|
+
plugin_id, PluginID) else str(plugin_id)
|
|
55
|
+
|
|
56
|
+
model_name_to_use = model_name if model_name else local_path
|
|
57
|
+
handle = cv_bind.ml_cv_create(
|
|
58
|
+
model_name=model_name_to_use,
|
|
59
|
+
config=config,
|
|
60
|
+
plugin_id=plugin_id_str,
|
|
61
|
+
device_id=device_id,
|
|
62
|
+
license_id=None,
|
|
63
|
+
license_key=None
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
return cls(handle, m_cfg)
|
|
67
|
+
|
|
68
|
+
def eject(self):
|
|
69
|
+
"""Release the model from memory."""
|
|
70
|
+
# py::capsule handles cleanup automatically
|
|
71
|
+
if hasattr(self, '_handle') and self._handle is not None:
|
|
72
|
+
del self._handle
|
|
73
|
+
self._handle = None
|
|
74
|
+
|
|
75
|
+
def infer(self, input_image_path: str) -> CVResults:
|
|
76
|
+
"""Perform inference on image."""
|
|
77
|
+
if self._handle is None:
|
|
78
|
+
raise RuntimeError("CV model not loaded. Call _load_from first.")
|
|
79
|
+
|
|
80
|
+
if not os.path.exists(input_image_path):
|
|
81
|
+
raise FileNotFoundError(
|
|
82
|
+
f"Input image not found: {input_image_path}")
|
|
83
|
+
|
|
84
|
+
try:
|
|
85
|
+
# Perform inference using the binding
|
|
86
|
+
result_dict = cv_bind.ml_cv_infer(
|
|
87
|
+
handle=self._handle,
|
|
88
|
+
input_image_path=input_image_path
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
# Convert result dictionary to CVResults
|
|
92
|
+
results = []
|
|
93
|
+
for result_data in result_dict["results"]:
|
|
94
|
+
# Create bounding box if present
|
|
95
|
+
bbox = None
|
|
96
|
+
if "bbox" in result_data and result_data["bbox"] is not None:
|
|
97
|
+
bbox_data = result_data["bbox"]
|
|
98
|
+
bbox = BoundingBox(
|
|
99
|
+
x=bbox_data["x"],
|
|
100
|
+
y=bbox_data["y"],
|
|
101
|
+
width=bbox_data["width"],
|
|
102
|
+
height=bbox_data["height"]
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
# Create CV result
|
|
106
|
+
cv_result = CVResult(
|
|
107
|
+
image_paths=result_data.get("image_paths"),
|
|
108
|
+
image_count=result_data.get("image_count", 0),
|
|
109
|
+
class_id=result_data.get("class_id", 0),
|
|
110
|
+
confidence=result_data.get("confidence", 0.0),
|
|
111
|
+
bbox=bbox,
|
|
112
|
+
text=result_data.get("text"),
|
|
113
|
+
embedding=result_data.get("embedding"),
|
|
114
|
+
embedding_dim=result_data.get("embedding_dim", 0)
|
|
115
|
+
)
|
|
116
|
+
results.append(cv_result)
|
|
117
|
+
|
|
118
|
+
return CVResults(
|
|
119
|
+
results=results,
|
|
120
|
+
result_count=result_dict["result_count"]
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
except Exception as e:
|
|
124
|
+
raise RuntimeError(f"CV inference failed: {str(e)}")
|
nexaai/diarize.py
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
from typing import List, Optional, Sequence, Union
|
|
2
|
+
from abc import abstractmethod
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
|
|
5
|
+
from nexaai.base import BaseModel
|
|
6
|
+
from nexaai.common import PluginID, ModelConfig
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@dataclass
|
|
10
|
+
class DiarizeConfig:
|
|
11
|
+
"""Configuration for speaker diarization."""
|
|
12
|
+
|
|
13
|
+
min_speakers: int = 0 # Minimum number of speakers (0 = auto-detect)
|
|
14
|
+
max_speakers: int = 0 # Maximum number of speakers (0 = no limit)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@dataclass
|
|
18
|
+
class SpeechSegment:
|
|
19
|
+
"""Speech segment with speaker label and timestamps."""
|
|
20
|
+
|
|
21
|
+
start_time: float # Segment start time in seconds
|
|
22
|
+
end_time: float # Segment end time in seconds
|
|
23
|
+
speaker_label: str # Speaker label (e.g., "SPEAKER_00")
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@dataclass
|
|
27
|
+
class DiarizeResult:
|
|
28
|
+
"""Result from speaker diarization."""
|
|
29
|
+
|
|
30
|
+
segments: Sequence[SpeechSegment] # Array of speech segments
|
|
31
|
+
segment_count: int # Number of segments
|
|
32
|
+
num_speakers: int # Total unique speakers detected
|
|
33
|
+
duration: float # Total audio duration in seconds
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class Diarize(BaseModel):
|
|
37
|
+
"""Abstract base class for speaker diarization models."""
|
|
38
|
+
|
|
39
|
+
def __init__(self, m_cfg: ModelConfig = ModelConfig()):
|
|
40
|
+
"""Initialize base Diarize class."""
|
|
41
|
+
self._m_cfg = m_cfg
|
|
42
|
+
|
|
43
|
+
@classmethod
|
|
44
|
+
def _load_from(
|
|
45
|
+
cls,
|
|
46
|
+
model_path: str,
|
|
47
|
+
model_name: Optional[str] = None,
|
|
48
|
+
m_cfg: ModelConfig = ModelConfig(),
|
|
49
|
+
plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
|
|
50
|
+
device_id: Optional[str] = None,
|
|
51
|
+
**kwargs
|
|
52
|
+
) -> "Diarize":
|
|
53
|
+
"""Load diarization model from local path using PyBind backend."""
|
|
54
|
+
from nexaai.diarize_impl.pybind_diarize_impl import PyBindDiarizeImpl
|
|
55
|
+
|
|
56
|
+
# There is no MLX implementation for diarization as of now.
|
|
57
|
+
return PyBindDiarizeImpl._load_from(
|
|
58
|
+
model_path, model_name, m_cfg, plugin_id, device_id
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
@abstractmethod
|
|
62
|
+
def infer(
|
|
63
|
+
self,
|
|
64
|
+
audio_path: str,
|
|
65
|
+
config: Optional[DiarizeConfig] = None,
|
|
66
|
+
) -> DiarizeResult:
|
|
67
|
+
"""
|
|
68
|
+
Perform speaker diarization on audio file.
|
|
69
|
+
|
|
70
|
+
Determines "who spoke when" in the audio recording, producing time-stamped segments
|
|
71
|
+
with speaker labels. Segments are time-ordered and non-overlapping.
|
|
72
|
+
|
|
73
|
+
Args:
|
|
74
|
+
audio_path: Path to audio file
|
|
75
|
+
config: Optional diarization configuration
|
|
76
|
+
|
|
77
|
+
Returns:
|
|
78
|
+
DiarizeResult with segments, speaker count, and duration
|
|
79
|
+
"""
|
|
80
|
+
pass
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
from typing import Any, Optional, Union
|
|
2
|
+
|
|
3
|
+
from nexaai.common import PluginID, ModelConfig
|
|
4
|
+
from nexaai.diarize import Diarize, DiarizeConfig, DiarizeResult, SpeechSegment
|
|
5
|
+
from nexaai.binds import diarize_bind, common_bind
|
|
6
|
+
from nexaai.runtime import _ensure_runtime
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class PyBindDiarizeImpl(Diarize):
|
|
10
|
+
def __init__(self, handle: Any, m_cfg: ModelConfig = ModelConfig()):
|
|
11
|
+
"""Private constructor, should not be called directly."""
|
|
12
|
+
super().__init__(m_cfg)
|
|
13
|
+
self._handle = handle # This is a py::capsule
|
|
14
|
+
self._model_config = None
|
|
15
|
+
|
|
16
|
+
@classmethod
|
|
17
|
+
def _load_from(
|
|
18
|
+
cls,
|
|
19
|
+
model_path: str,
|
|
20
|
+
model_name: Optional[str] = None,
|
|
21
|
+
m_cfg: ModelConfig = ModelConfig(),
|
|
22
|
+
plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
|
|
23
|
+
device_id: Optional[str] = None,
|
|
24
|
+
) -> "PyBindDiarizeImpl":
|
|
25
|
+
"""Load diarization model from local path using PyBind backend."""
|
|
26
|
+
_ensure_runtime()
|
|
27
|
+
|
|
28
|
+
# Create model config
|
|
29
|
+
config = common_bind.ModelConfig()
|
|
30
|
+
|
|
31
|
+
config.n_ctx = m_cfg.n_ctx
|
|
32
|
+
if m_cfg.n_threads is not None:
|
|
33
|
+
config.n_threads = m_cfg.n_threads
|
|
34
|
+
if m_cfg.n_threads_batch is not None:
|
|
35
|
+
config.n_threads_batch = m_cfg.n_threads_batch
|
|
36
|
+
if m_cfg.n_batch is not None:
|
|
37
|
+
config.n_batch = m_cfg.n_batch
|
|
38
|
+
if m_cfg.n_ubatch is not None:
|
|
39
|
+
config.n_ubatch = m_cfg.n_ubatch
|
|
40
|
+
if m_cfg.n_seq_max is not None:
|
|
41
|
+
config.n_seq_max = m_cfg.n_seq_max
|
|
42
|
+
config.n_gpu_layers = m_cfg.n_gpu_layers
|
|
43
|
+
|
|
44
|
+
# handle chat template strings (if needed for diarization)
|
|
45
|
+
if m_cfg.chat_template_path:
|
|
46
|
+
config.chat_template_path = m_cfg.chat_template_path
|
|
47
|
+
|
|
48
|
+
if m_cfg.chat_template_content:
|
|
49
|
+
config.chat_template_content = m_cfg.chat_template_content
|
|
50
|
+
|
|
51
|
+
# Convert plugin_id to string
|
|
52
|
+
plugin_id_str = (
|
|
53
|
+
plugin_id.value if isinstance(plugin_id, PluginID) else str(plugin_id)
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
# Create Diarize handle using the binding
|
|
57
|
+
handle = diarize_bind.ml_diarize_create(
|
|
58
|
+
model_path=model_path,
|
|
59
|
+
model_name=model_name,
|
|
60
|
+
model_config=config,
|
|
61
|
+
plugin_id=plugin_id_str,
|
|
62
|
+
device_id=device_id,
|
|
63
|
+
license_id=None, # Optional
|
|
64
|
+
license_key=None, # Optional
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
return cls(handle, m_cfg)
|
|
68
|
+
|
|
69
|
+
def eject(self):
|
|
70
|
+
"""Release the model from memory."""
|
|
71
|
+
# py::capsule handles cleanup automatically
|
|
72
|
+
if hasattr(self, "_handle") and self._handle is not None:
|
|
73
|
+
del self._handle
|
|
74
|
+
self._handle = None
|
|
75
|
+
|
|
76
|
+
def infer(
|
|
77
|
+
self,
|
|
78
|
+
audio_path: str,
|
|
79
|
+
config: Optional[DiarizeConfig] = None,
|
|
80
|
+
) -> DiarizeResult:
|
|
81
|
+
"""
|
|
82
|
+
Perform speaker diarization on audio file.
|
|
83
|
+
|
|
84
|
+
Determines "who spoke when" in the audio recording, producing time-stamped segments
|
|
85
|
+
with speaker labels. Segments are time-ordered and non-overlapping.
|
|
86
|
+
|
|
87
|
+
Args:
|
|
88
|
+
audio_path: Path to audio file
|
|
89
|
+
config: Optional diarization configuration
|
|
90
|
+
|
|
91
|
+
Returns:
|
|
92
|
+
DiarizeResult with segments, speaker count, and duration
|
|
93
|
+
"""
|
|
94
|
+
if self._handle is None:
|
|
95
|
+
raise RuntimeError("Diarization model not loaded. Call _load_from first.")
|
|
96
|
+
|
|
97
|
+
# Convert DiarizeConfig to binding format if provided
|
|
98
|
+
diarize_config = None
|
|
99
|
+
if config:
|
|
100
|
+
diarize_config = diarize_bind.DiarizeConfig()
|
|
101
|
+
diarize_config.min_speakers = config.min_speakers
|
|
102
|
+
diarize_config.max_speakers = config.max_speakers
|
|
103
|
+
|
|
104
|
+
# Perform diarization using the binding
|
|
105
|
+
result_dict = diarize_bind.ml_diarize_infer(
|
|
106
|
+
handle=self._handle, audio_path=audio_path, config=diarize_config
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
# Convert result to DiarizeResult
|
|
110
|
+
segments = []
|
|
111
|
+
for segment_dict in result_dict.get("segments", []):
|
|
112
|
+
segments.append(
|
|
113
|
+
SpeechSegment(
|
|
114
|
+
start_time=float(segment_dict["start_time"]),
|
|
115
|
+
end_time=float(segment_dict["end_time"]),
|
|
116
|
+
speaker_label=segment_dict["speaker_label"],
|
|
117
|
+
)
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
return DiarizeResult(
|
|
121
|
+
segments=segments,
|
|
122
|
+
segment_count=result_dict.get("segment_count", 0),
|
|
123
|
+
num_speakers=result_dict.get("num_speakers", 0),
|
|
124
|
+
duration=result_dict.get("duration", 0.0),
|
|
125
|
+
)
|
nexaai/embedder.py
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
from typing import List, Union
|
|
2
|
+
from dataclasses import dataclass
|
|
3
|
+
from abc import abstractmethod
|
|
4
|
+
import numpy as np
|
|
5
|
+
|
|
6
|
+
from nexaai.base import BaseModel
|
|
7
|
+
from nexaai.common import PluginID
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@dataclass
|
|
11
|
+
class EmbeddingConfig:
|
|
12
|
+
batch_size: int = 32
|
|
13
|
+
normalize: bool = True
|
|
14
|
+
normalize_method: str = "l2"
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class Embedder(BaseModel):
|
|
18
|
+
def __init__(self):
|
|
19
|
+
"""
|
|
20
|
+
Internal initializer
|
|
21
|
+
"""
|
|
22
|
+
pass
|
|
23
|
+
|
|
24
|
+
@classmethod
|
|
25
|
+
def _load_from(cls, model_path: str, model_name: str = None, tokenizer_file: str = "tokenizer.json", plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP, **kwargs):
|
|
26
|
+
"""
|
|
27
|
+
Load an embedder from model files, routing to appropriate implementation.
|
|
28
|
+
|
|
29
|
+
Args:
|
|
30
|
+
model_path: Path to the model file
|
|
31
|
+
model_name: Name of the model
|
|
32
|
+
tokenizer_file: Path to the tokenizer file (default: "tokenizer.json")
|
|
33
|
+
plugin_id: Plugin ID to use for the model (default: PluginID.LLAMA_CPP)
|
|
34
|
+
|
|
35
|
+
Returns:
|
|
36
|
+
Embedder instance
|
|
37
|
+
"""
|
|
38
|
+
# Check plugin_id value for routing - handle both enum and string
|
|
39
|
+
plugin_value = plugin_id.value if isinstance(plugin_id, PluginID) else plugin_id
|
|
40
|
+
|
|
41
|
+
if plugin_value == "mlx":
|
|
42
|
+
from nexaai.embedder_impl.mlx_embedder_impl import MLXEmbedderImpl
|
|
43
|
+
return MLXEmbedderImpl._load_from(model_path, model_name, tokenizer_file, plugin_id)
|
|
44
|
+
else:
|
|
45
|
+
from nexaai.embedder_impl.pybind_embedder_impl import PyBindEmbedderImpl
|
|
46
|
+
return PyBindEmbedderImpl._load_from(model_path, model_name, tokenizer_file, plugin_id)
|
|
47
|
+
|
|
48
|
+
@abstractmethod
|
|
49
|
+
def generate(self, texts: Union[List[str], str] = None, config: EmbeddingConfig = EmbeddingConfig(), input_ids: Union[List[int], List[List[int]]] = None) -> np.ndarray:
|
|
50
|
+
"""
|
|
51
|
+
Generate embeddings for the given texts or input_ids.
|
|
52
|
+
|
|
53
|
+
Args:
|
|
54
|
+
texts: List of strings or single string to embed
|
|
55
|
+
input_ids: Pre-tokenized input as:
|
|
56
|
+
- Single sequence: list of integers [1, 2, 3, 4]
|
|
57
|
+
- Multiple sequences: list of lists [[1, 2, 3], [4, 5, 6]]
|
|
58
|
+
config: Configuration for embedding generation
|
|
59
|
+
|
|
60
|
+
Returns:
|
|
61
|
+
numpy array of embeddings with shape (num_sequences, embedding_dim)
|
|
62
|
+
"""
|
|
63
|
+
pass
|
|
64
|
+
|
|
65
|
+
@abstractmethod
|
|
66
|
+
def get_embedding_dim(self) -> int:
|
|
67
|
+
"""
|
|
68
|
+
Get the embedding dimension of the model
|
|
69
|
+
|
|
70
|
+
Returns:
|
|
71
|
+
The embedding dimension in int
|
|
72
|
+
"""
|
|
73
|
+
pass
|
|
File without changes
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
from typing import List, Union
|
|
2
|
+
import numpy as np
|
|
3
|
+
|
|
4
|
+
from nexaai.common import PluginID
|
|
5
|
+
from nexaai.embedder import Embedder, EmbeddingConfig
|
|
6
|
+
from nexaai.mlx_backend.embedding.interface import create_embedder
|
|
7
|
+
from nexaai.mlx_backend.ml import ModelConfig as MLXModelConfig, SamplerConfig as MLXSamplerConfig, GenerationConfig as MLXGenerationConfig, EmbeddingConfig
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class MLXEmbedderImpl(Embedder):
|
|
11
|
+
def __init__(self):
|
|
12
|
+
"""Initialize MLX Embedder implementation."""
|
|
13
|
+
super().__init__()
|
|
14
|
+
self._mlx_embedder = None
|
|
15
|
+
|
|
16
|
+
@classmethod
|
|
17
|
+
def _load_from(cls, model_path: str, model_name: str = None, tokenizer_file: str = "tokenizer.json", plugin_id: Union[PluginID, str] = PluginID.MLX):
|
|
18
|
+
"""
|
|
19
|
+
Load an embedder from model files using MLX backend.
|
|
20
|
+
|
|
21
|
+
Args:
|
|
22
|
+
model_path: Path to the model file
|
|
23
|
+
model_name: Name of the model
|
|
24
|
+
tokenizer_file: Path to the tokenizer file (default: "tokenizer.json")
|
|
25
|
+
plugin_id: Plugin ID to use for the model (default: PluginID.MLX)
|
|
26
|
+
|
|
27
|
+
Returns:
|
|
28
|
+
MLXEmbedderImpl instance
|
|
29
|
+
"""
|
|
30
|
+
try:
|
|
31
|
+
# Create instance
|
|
32
|
+
instance = cls()
|
|
33
|
+
|
|
34
|
+
# Use the factory function to create the appropriate embedder based on model type
|
|
35
|
+
# This will automatically detect if it's JinaV2 or generic model and route correctly
|
|
36
|
+
instance._mlx_embedder = create_embedder(
|
|
37
|
+
model_path=model_path,
|
|
38
|
+
# model_name=model_name, # FIXME: For MLX Embedder, model_name is not used
|
|
39
|
+
tokenizer_path=tokenizer_file
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
# Load the model
|
|
43
|
+
success = instance._mlx_embedder.load_model(model_path)
|
|
44
|
+
if not success:
|
|
45
|
+
raise RuntimeError("Failed to load MLX embedder model")
|
|
46
|
+
|
|
47
|
+
return instance
|
|
48
|
+
except Exception as e:
|
|
49
|
+
raise RuntimeError(f"Failed to load MLX Embedder: {str(e)}")
|
|
50
|
+
|
|
51
|
+
def eject(self):
|
|
52
|
+
"""
|
|
53
|
+
Clean up resources and destroy the embedder
|
|
54
|
+
"""
|
|
55
|
+
if self._mlx_embedder:
|
|
56
|
+
self._mlx_embedder.destroy()
|
|
57
|
+
self._mlx_embedder = None
|
|
58
|
+
|
|
59
|
+
def generate(self, texts: Union[List[str], str] = None, config: EmbeddingConfig = EmbeddingConfig(), input_ids: Union[List[int], List[List[int]]] = None) -> np.ndarray:
|
|
60
|
+
"""
|
|
61
|
+
Generate embeddings for the given texts or input_ids.
|
|
62
|
+
|
|
63
|
+
Args:
|
|
64
|
+
texts: List of strings or single string to embed
|
|
65
|
+
input_ids: Pre-tokenized input as:
|
|
66
|
+
- Single sequence: list of integers [1, 2, 3, 4]
|
|
67
|
+
- Multiple sequences: list of lists [[1, 2, 3], [4, 5, 6]]
|
|
68
|
+
config: Configuration for embedding generation
|
|
69
|
+
|
|
70
|
+
Returns:
|
|
71
|
+
numpy array of embeddings with shape (num_sequences, embedding_dim)
|
|
72
|
+
"""
|
|
73
|
+
if not self._mlx_embedder:
|
|
74
|
+
raise RuntimeError("MLX Embedder not loaded")
|
|
75
|
+
|
|
76
|
+
if texts is None and input_ids is None:
|
|
77
|
+
raise ValueError("Either texts or input_ids must be provided")
|
|
78
|
+
|
|
79
|
+
# MLX embedder currently only supports text input, not pre-tokenized input_ids
|
|
80
|
+
if input_ids is not None:
|
|
81
|
+
raise NotImplementedError("MLX embedder does not support input_ids, only text input")
|
|
82
|
+
|
|
83
|
+
try:
|
|
84
|
+
# Convert single string to list if needed
|
|
85
|
+
if isinstance(texts, str):
|
|
86
|
+
texts = [texts]
|
|
87
|
+
|
|
88
|
+
# MLX config classes are already imported
|
|
89
|
+
|
|
90
|
+
# Convert our config to MLX config
|
|
91
|
+
mlx_config = EmbeddingConfig()
|
|
92
|
+
mlx_config.batch_size = config.batch_size
|
|
93
|
+
mlx_config.normalize = config.normalize
|
|
94
|
+
mlx_config.normalize_method = config.normalize_method
|
|
95
|
+
|
|
96
|
+
# Generate embeddings using MLX
|
|
97
|
+
embeddings = self._mlx_embedder.embed(texts, mlx_config)
|
|
98
|
+
|
|
99
|
+
# Convert to numpy array
|
|
100
|
+
return np.array(embeddings, dtype=np.float32)
|
|
101
|
+
|
|
102
|
+
except Exception as e:
|
|
103
|
+
raise RuntimeError(f"Failed to generate embeddings: {str(e)}")
|
|
104
|
+
|
|
105
|
+
def get_embedding_dim(self) -> int:
|
|
106
|
+
"""
|
|
107
|
+
Get the embedding dimension of the model
|
|
108
|
+
|
|
109
|
+
Returns:
|
|
110
|
+
The embedding dimension in int
|
|
111
|
+
"""
|
|
112
|
+
if not self._mlx_embedder:
|
|
113
|
+
raise RuntimeError("MLX Embedder not loaded")
|
|
114
|
+
|
|
115
|
+
try:
|
|
116
|
+
return self._mlx_embedder.embedding_dim()
|
|
117
|
+
except Exception as e:
|
|
118
|
+
raise RuntimeError(f"Failed to get embedding dimension: {str(e)}")
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
from typing import List, Union
|
|
2
|
+
import numpy as np
|
|
3
|
+
|
|
4
|
+
from nexaai.common import PluginID
|
|
5
|
+
from nexaai.embedder import Embedder, EmbeddingConfig
|
|
6
|
+
from nexaai.binds import embedder_bind
|
|
7
|
+
from nexaai.runtime import _ensure_runtime
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class PyBindEmbedderImpl(Embedder):
|
|
11
|
+
def __init__(self, _handle_ptr):
|
|
12
|
+
"""
|
|
13
|
+
Internal initializer
|
|
14
|
+
"""
|
|
15
|
+
super().__init__()
|
|
16
|
+
self._handle = _handle_ptr
|
|
17
|
+
|
|
18
|
+
@classmethod
|
|
19
|
+
def _load_from(cls, model_path: str, model_name: str = None, tokenizer_file: str = "tokenizer.json", plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP):
|
|
20
|
+
"""
|
|
21
|
+
Load an embedder from model files
|
|
22
|
+
|
|
23
|
+
Args:
|
|
24
|
+
model_path: Path to the model file
|
|
25
|
+
model_name: Name of the model
|
|
26
|
+
tokenizer_file: Path to the tokenizer file (default: "tokenizer.json")
|
|
27
|
+
plugin_id: Plugin ID to use for the model (default: PluginID.LLAMA_CPP)
|
|
28
|
+
|
|
29
|
+
Returns:
|
|
30
|
+
PyBindEmbedderImpl instance
|
|
31
|
+
"""
|
|
32
|
+
_ensure_runtime()
|
|
33
|
+
# Convert enum to string for C++ binding
|
|
34
|
+
plugin_id_str = plugin_id.value if isinstance(plugin_id, PluginID) else plugin_id
|
|
35
|
+
# New parameter order: model_path, plugin_id, tokenizer_path (optional)
|
|
36
|
+
handle = embedder_bind.ml_embedder_create(model_path, model_name, plugin_id_str, tokenizer_file)
|
|
37
|
+
return cls(handle)
|
|
38
|
+
|
|
39
|
+
def eject(self):
|
|
40
|
+
"""
|
|
41
|
+
Clean up resources and destroy the embedder
|
|
42
|
+
"""
|
|
43
|
+
# Destructor of the handle will unload the model correctly
|
|
44
|
+
del self._handle
|
|
45
|
+
self._handle = None
|
|
46
|
+
|
|
47
|
+
def generate(self, texts: Union[List[str], str] = None, config: EmbeddingConfig = EmbeddingConfig(), input_ids: Union[List[int], List[List[int]]] = None) -> np.ndarray:
|
|
48
|
+
"""
|
|
49
|
+
Generate embeddings for the given texts or input_ids.
|
|
50
|
+
|
|
51
|
+
Args:
|
|
52
|
+
texts: List of strings or single string to embed
|
|
53
|
+
input_ids: Pre-tokenized input as:
|
|
54
|
+
- Single sequence: list of integers [1, 2, 3, 4]
|
|
55
|
+
- Multiple sequences: list of lists [[1, 2, 3], [4, 5, 6]]
|
|
56
|
+
config: Configuration for embedding generation
|
|
57
|
+
|
|
58
|
+
Returns:
|
|
59
|
+
numpy array of embeddings with shape (num_sequences, embedding_dim)
|
|
60
|
+
"""
|
|
61
|
+
if texts is None and input_ids is None:
|
|
62
|
+
raise ValueError("Either texts or input_ids must be provided")
|
|
63
|
+
|
|
64
|
+
# Create bind config
|
|
65
|
+
bind_config = embedder_bind.EmbeddingConfig()
|
|
66
|
+
bind_config.batch_size = config.batch_size
|
|
67
|
+
bind_config.normalize = config.normalize
|
|
68
|
+
bind_config.normalize_method = config.normalize_method
|
|
69
|
+
|
|
70
|
+
# Convert single string to list if needed
|
|
71
|
+
if isinstance(texts, str):
|
|
72
|
+
texts = [texts]
|
|
73
|
+
|
|
74
|
+
# Convert input_ids to 2D format if needed
|
|
75
|
+
processed_input_ids = None
|
|
76
|
+
if input_ids is not None:
|
|
77
|
+
if len(input_ids) > 0 and isinstance(input_ids[0], int):
|
|
78
|
+
# Single sequence: convert [1, 2, 3] to [[1, 2, 3]]
|
|
79
|
+
processed_input_ids = [input_ids]
|
|
80
|
+
else:
|
|
81
|
+
# Multiple sequences: already in correct format [[1, 2], [3, 4]]
|
|
82
|
+
processed_input_ids = input_ids
|
|
83
|
+
|
|
84
|
+
# Pass both parameters, let the ABI handle validation
|
|
85
|
+
embeddings = embedder_bind.ml_embedder_embed(self._handle, bind_config, texts, processed_input_ids)
|
|
86
|
+
|
|
87
|
+
return embeddings
|
|
88
|
+
|
|
89
|
+
def get_embedding_dim(self) -> int:
|
|
90
|
+
"""
|
|
91
|
+
Get the embedding dimension of the model
|
|
92
|
+
|
|
93
|
+
Returns:
|
|
94
|
+
The embedding dimension in int
|
|
95
|
+
"""
|
|
96
|
+
return embedder_bind.ml_embedder_embedding_dim(self._handle)
|