nexaai 1.0.18rc1__cp310-cp310-macosx_14_0_universal2.whl → 1.0.19__cp310-cp310-macosx_14_0_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nexaai might be problematic. Click here for more details.

Files changed (215) hide show
  1. nexaai/_stub.cpython-310-darwin.so +0 -0
  2. nexaai/_version.py +1 -1
  3. nexaai/asr.py +2 -1
  4. nexaai/binds/{nexa_llama_cpp → cpu_gpu}/libggml-base.dylib +0 -0
  5. nexaai/binds/{nexa_llama_cpp → cpu_gpu}/libmtmd.dylib +0 -0
  6. nexaai/binds/{nexa_llama_cpp/libllama.dylib → cpu_gpu/libnexa_cpu_gpu.dylib} +0 -0
  7. nexaai/binds/{nexa_llama_cpp → cpu_gpu}/libnexa_plugin.dylib +0 -0
  8. nexaai/binds/libnexa_bridge.dylib +0 -0
  9. nexaai/binds/llm_bind.cpython-310-darwin.so +0 -0
  10. nexaai/binds/{nexa_mlx → metal}/libnexa_plugin.dylib +0 -0
  11. nexaai/binds/{nexa_nexaml → nexaml}/libggml-base.dylib +0 -0
  12. nexaai/binds/{nexa_nexaml → nexaml}/libnexa-mm-process.dylib +0 -0
  13. nexaai/binds/{nexa_nexaml → nexaml}/libnexa-sampling.dylib +0 -0
  14. nexaai/binds/nexaml/libnexa_plugin.dylib +0 -0
  15. nexaai/binds/nexaml/libnexaproc.dylib +0 -0
  16. nexaai/binds/{nexa_nexaml → nexaml}/libomp.dylib +0 -0
  17. nexaai/binds/nexaml/libqwen3-vl.dylib +0 -0
  18. nexaai/binds/nexaml/libqwen3vl-vision.dylib +0 -0
  19. nexaai/cv.py +2 -1
  20. nexaai/embedder.py +1 -1
  21. nexaai/image_gen.py +2 -1
  22. nexaai/llm.py +5 -3
  23. nexaai/llm_impl/mlx_llm_impl.py +2 -0
  24. nexaai/llm_impl/pybind_llm_impl.py +2 -0
  25. nexaai/mlx_backend/vlm/generate_qwen3_vl.py +176 -96
  26. nexaai/mlx_backend/vlm/generate_qwen3_vl_moe.py +259 -0
  27. nexaai/mlx_backend/vlm/interface.py +99 -30
  28. nexaai/mlx_backend/vlm/main.py +58 -9
  29. nexaai/mlx_backend/vlm/modeling/models/qwen3_vl/qwen3vl.py +338 -299
  30. nexaai/mlx_backend/vlm/modeling/models/qwen3vl_moe/llm_common/__init__.py +0 -0
  31. nexaai/mlx_backend/vlm/modeling/models/qwen3vl_moe/llm_common/base.py +117 -0
  32. nexaai/mlx_backend/vlm/modeling/models/qwen3vl_moe/llm_common/cache.py +531 -0
  33. nexaai/mlx_backend/vlm/modeling/models/qwen3vl_moe/llm_common/generate.py +701 -0
  34. nexaai/mlx_backend/vlm/modeling/models/qwen3vl_moe/llm_common/rope_utils.py +255 -0
  35. nexaai/mlx_backend/vlm/modeling/models/qwen3vl_moe/llm_common/sample_utils.py +303 -0
  36. nexaai/mlx_backend/vlm/modeling/models/qwen3vl_moe/llm_common/tokenizer_utils.py +407 -0
  37. nexaai/mlx_backend/vlm/modeling/models/qwen3vl_moe/processor.py +476 -0
  38. nexaai/mlx_backend/vlm/modeling/models/qwen3vl_moe/qwen3vl_moe.py +1308 -0
  39. nexaai/mlx_backend/vlm/modeling/models/qwen3vl_moe/switch_layers.py +210 -0
  40. nexaai/rerank.py +2 -1
  41. nexaai/tts.py +2 -1
  42. nexaai/utils/manifest_utils.py +222 -15
  43. nexaai/utils/model_manager.py +120 -14
  44. nexaai/utils/model_types.py +2 -0
  45. nexaai/vlm.py +2 -1
  46. {nexaai-1.0.18rc1.dist-info → nexaai-1.0.19.dist-info}/METADATA +1 -2
  47. {nexaai-1.0.18rc1.dist-info → nexaai-1.0.19.dist-info}/RECORD +211 -200
  48. nexaai/binds/nexa_nexaml/libnexa_plugin.dylib +0 -0
  49. nexaai/binds/nexa_nexaml/libnexaproc.dylib +0 -0
  50. nexaai/binds/nexa_nexaml/libqwen3-vl.dylib +0 -0
  51. nexaai/binds/nexa_nexaml/libqwen3vl-vision.dylib +0 -0
  52. /nexaai/binds/{nexa_llama_cpp → cpu_gpu}/libggml-cpu.so +0 -0
  53. /nexaai/binds/{nexa_llama_cpp → cpu_gpu}/libggml-metal.so +0 -0
  54. /nexaai/binds/{nexa_llama_cpp → cpu_gpu}/libggml.dylib +0 -0
  55. /nexaai/binds/{nexa_mlx → metal}/py-lib/ml.py +0 -0
  56. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/__init__.py +0 -0
  57. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/codec/__init__.py +0 -0
  58. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/codec/models/__init__.py +0 -0
  59. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/codec/models/bigvgan/__init__.py +0 -0
  60. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/codec/models/bigvgan/activation.py +0 -0
  61. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/codec/models/bigvgan/amp.py +0 -0
  62. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/codec/models/bigvgan/bigvgan.py +0 -0
  63. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/codec/models/bigvgan/conv.py +0 -0
  64. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/codec/models/bigvgan/resample.py +0 -0
  65. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/codec/models/descript/__init__.py +0 -0
  66. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/codec/models/descript/base.py +0 -0
  67. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/codec/models/descript/dac.py +0 -0
  68. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/codec/models/descript/nn/__init__.py +0 -0
  69. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/codec/models/descript/nn/layers.py +0 -0
  70. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/codec/models/descript/nn/quantize.py +0 -0
  71. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/codec/models/encodec/__init__.py +0 -0
  72. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/codec/models/encodec/encodec.py +0 -0
  73. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/codec/models/mimi/__init__.py +0 -0
  74. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/codec/models/mimi/mimi.py +0 -0
  75. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/codec/models/mimi/modules/__init__.py +0 -0
  76. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/codec/models/mimi/modules/conv.py +0 -0
  77. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/codec/models/mimi/modules/kv_cache.py +0 -0
  78. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/codec/models/mimi/modules/quantization.py +0 -0
  79. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/codec/models/mimi/modules/seanet.py +0 -0
  80. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/codec/models/mimi/modules/transformer.py +0 -0
  81. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/codec/models/s3/__init__.py +0 -0
  82. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/codec/models/s3/model.py +0 -0
  83. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/codec/models/s3/model_v2.py +0 -0
  84. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/codec/models/s3/utils.py +0 -0
  85. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/codec/models/snac/__init__.py +0 -0
  86. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/codec/models/snac/attention.py +0 -0
  87. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/codec/models/snac/layers.py +0 -0
  88. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/codec/models/snac/snac.py +0 -0
  89. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/codec/models/snac/vq.py +0 -0
  90. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/codec/models/vocos/__init__.py +0 -0
  91. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/codec/models/vocos/mel.py +0 -0
  92. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/codec/models/vocos/vocos.py +0 -0
  93. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/codec/tests/__init__.py +0 -0
  94. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/codec/tests/test_bigvgan.py +0 -0
  95. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/codec/tests/test_descript.py +0 -0
  96. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/codec/tests/test_encodec.py +0 -0
  97. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/codec/tests/test_mimi.py +0 -0
  98. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/codec/tests/test_s3.py +0 -0
  99. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/codec/tests/test_snac.py +0 -0
  100. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/codec/tests/test_vocos.py +0 -0
  101. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/server.py +0 -0
  102. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/sts/__init__.py +0 -0
  103. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/sts/tests/test_voice_pipeline.py +0 -0
  104. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/sts/voice_pipeline.py +0 -0
  105. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/stt/__init__.py +0 -0
  106. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/stt/generate.py +0 -0
  107. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/stt/models/__init__.py +0 -0
  108. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/stt/models/parakeet/__init__.py +0 -0
  109. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/stt/models/parakeet/alignment.py +0 -0
  110. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/stt/models/parakeet/attention.py +0 -0
  111. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/stt/models/parakeet/audio.py +0 -0
  112. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/stt/models/parakeet/conformer.py +0 -0
  113. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/stt/models/parakeet/ctc.py +0 -0
  114. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/stt/models/parakeet/parakeet.py +0 -0
  115. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/stt/models/parakeet/rnnt.py +0 -0
  116. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/stt/models/parakeet/tokenizer.py +0 -0
  117. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/stt/models/wav2vec/feature_extractor.py +0 -0
  118. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/stt/models/wav2vec/wav2vec.py +0 -0
  119. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/stt/models/whisper/__init__.py +0 -0
  120. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/stt/models/whisper/audio.py +0 -0
  121. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/stt/models/whisper/decoding.py +0 -0
  122. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/stt/models/whisper/timing.py +0 -0
  123. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/stt/models/whisper/tokenizer.py +0 -0
  124. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/stt/models/whisper/whisper.py +0 -0
  125. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/stt/models/whisper/writers.py +0 -0
  126. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/stt/tests/test_models.py +0 -0
  127. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/stt/utils.py +0 -0
  128. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/tts/__init__.py +0 -0
  129. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/tts/audio_player.py +0 -0
  130. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/tts/convert.py +0 -0
  131. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/tts/generate.py +0 -0
  132. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/tts/models/__init__.py +0 -0
  133. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/tts/models/bark/__init__.py +0 -0
  134. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/tts/models/bark/bark.py +0 -0
  135. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/tts/models/bark/isftnet.py +0 -0
  136. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/tts/models/bark/pipeline.py +0 -0
  137. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/tts/models/base.py +0 -0
  138. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/tts/models/dia/__init__.py +0 -0
  139. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/tts/models/dia/audio.py +0 -0
  140. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/tts/models/dia/config.py +0 -0
  141. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/tts/models/dia/dia.py +0 -0
  142. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/tts/models/dia/layers.py +0 -0
  143. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/tts/models/indextts/__init__.py +0 -0
  144. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/tts/models/indextts/attention.py +0 -0
  145. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/tts/models/indextts/bigvgan.py +0 -0
  146. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/tts/models/indextts/conformer.py +0 -0
  147. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/tts/models/indextts/ecapa_tdnn/__init__.py +0 -0
  148. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/tts/models/indextts/ecapa_tdnn/asp.py +0 -0
  149. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/tts/models/indextts/ecapa_tdnn/ecapa_tdnn.py +0 -0
  150. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/tts/models/indextts/ecapa_tdnn/se_res2net.py +0 -0
  151. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/tts/models/indextts/ecapa_tdnn/tdnn.py +0 -0
  152. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/tts/models/indextts/gpt2.py +0 -0
  153. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/tts/models/indextts/indextts.py +0 -0
  154. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/tts/models/indextts/mel.py +0 -0
  155. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/tts/models/indextts/normalize.py +0 -0
  156. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/tts/models/indextts/perceiver.py +0 -0
  157. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/tts/models/interpolate.py +0 -0
  158. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/tts/models/kokoro/__init__.py +0 -0
  159. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/tts/models/kokoro/istftnet.py +0 -0
  160. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/tts/models/kokoro/kokoro.py +0 -0
  161. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/tts/models/kokoro/modules.py +0 -0
  162. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/tts/models/kokoro/pipeline.py +0 -0
  163. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/tts/models/kokoro/voice.py +0 -0
  164. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/tts/models/llama/__init__.py +0 -0
  165. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/tts/models/llama/llama.py +0 -0
  166. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/tts/models/outetts/__init__.py +0 -0
  167. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/tts/models/outetts/audio_processor.py +0 -0
  168. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/tts/models/outetts/dac_interface.py +0 -0
  169. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/tts/models/outetts/outetts.py +0 -0
  170. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/tts/models/outetts/prompt_processor.py +0 -0
  171. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/tts/models/outetts/tokens.py +0 -0
  172. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/tts/models/sesame/__init__.py +0 -0
  173. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/tts/models/sesame/attention.py +0 -0
  174. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/tts/models/sesame/sesame.py +0 -0
  175. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/tts/models/sesame/watermarking.py +0 -0
  176. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/tts/models/spark/__init__.py +0 -0
  177. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/tts/models/spark/audio_tokenizer.py +0 -0
  178. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/tts/models/spark/bicodec.py +0 -0
  179. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/tts/models/spark/modules/__init__.py +0 -0
  180. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/tts/models/spark/modules/blocks/__init__.py +0 -0
  181. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/tts/models/spark/modules/blocks/sampler.py +0 -0
  182. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/tts/models/spark/modules/encoder_decoder/__init__.py +0 -0
  183. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/tts/models/spark/modules/encoder_decoder/feat_decoder.py +0 -0
  184. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/tts/models/spark/modules/encoder_decoder/feat_encoder.py +0 -0
  185. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/tts/models/spark/modules/encoder_decoder/wave_generator.py +0 -0
  186. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/tts/models/spark/modules/finite_scalar_quantization.py +0 -0
  187. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/tts/models/spark/modules/residual.py +0 -0
  188. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/tts/models/spark/modules/residual_fsq.py +0 -0
  189. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/tts/models/spark/modules/speaker/__init__.py +0 -0
  190. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/tts/models/spark/modules/speaker/ecapa_tdnn.py +0 -0
  191. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/tts/models/spark/modules/speaker/perceiver_encoder.py +0 -0
  192. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/tts/models/spark/modules/speaker/pooling_layers.py +0 -0
  193. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/tts/models/spark/modules/speaker/speaker_encoder.py +0 -0
  194. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/tts/models/spark/spark.py +0 -0
  195. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/tts/models/spark/utils/audio.py +0 -0
  196. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/tts/models/spark/utils/file.py +0 -0
  197. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/tts/models/spark/utils/token_parser.py +0 -0
  198. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/tts/tests/__init__.py +0 -0
  199. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/tts/tests/test_base.py +0 -0
  200. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/tts/tests/test_convert.py +0 -0
  201. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/tts/tests/test_interpolate.py +0 -0
  202. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/tts/tests/test_models.py +0 -0
  203. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/tts/utils.py +0 -0
  204. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/utils.py +0 -0
  205. /nexaai/binds/{nexa_mlx → metal}/py-lib/mlx_audio/version.py +0 -0
  206. /nexaai/binds/{nexa_mlx → metal}/py-lib/profiling.py +0 -0
  207. /nexaai/binds/{nexa_nexaml → nexaml}/libfftw3.3.dylib +0 -0
  208. /nexaai/binds/{nexa_nexaml → nexaml}/libfftw3f.3.dylib +0 -0
  209. /nexaai/binds/{nexa_nexaml → nexaml}/libggml-cpu.so +0 -0
  210. /nexaai/binds/{nexa_nexaml → nexaml}/libggml-metal.so +0 -0
  211. /nexaai/binds/{nexa_nexaml → nexaml}/libggml.dylib +0 -0
  212. /nexaai/binds/{nexa_nexaml → nexaml}/libmp3lame.0.dylib +0 -0
  213. /nexaai/binds/{nexa_nexaml → nexaml}/libmpg123.0.dylib +0 -0
  214. {nexaai-1.0.18rc1.dist-info → nexaai-1.0.19.dist-info}/WHEEL +0 -0
  215. {nexaai-1.0.18rc1.dist-info → nexaai-1.0.19.dist-info}/top_level.txt +0 -0
Binary file
nexaai/_version.py CHANGED
@@ -1,4 +1,4 @@
1
1
  # This file is generated by CMake from _version.py.in
2
2
  # Do not modify this file manually - it will be overwritten
3
3
 
4
- __version__ = "1.0.18-rc1"
4
+ __version__ = "1.0.19"
nexaai/asr.py CHANGED
@@ -35,7 +35,8 @@ class ASR(BaseModel):
35
35
  tokenizer_path: Optional[str] = None,
36
36
  language: Optional[str] = None,
37
37
  plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
38
- device_id: Optional[str] = None
38
+ device_id: Optional[str] = None,
39
+ **kwargs
39
40
  ) -> 'ASR':
40
41
  """Load ASR model from local path, routing to appropriate implementation."""
41
42
  # Check plugin_id value for routing - handle both enum and string
Binary file
Binary file
Binary file
Binary file
Binary file
nexaai/cv.py CHANGED
@@ -73,7 +73,8 @@ class CVModel(BaseModel):
73
73
  _: str, # TODO: remove this argument, this is a hack to make api design happy
74
74
  config: CVModelConfig,
75
75
  plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
76
- device_id: Optional[str] = None
76
+ device_id: Optional[str] = None,
77
+ **kwargs
77
78
  ) -> 'CVModel':
78
79
  """Load CV model from configuration, routing to appropriate implementation."""
79
80
  # Check plugin_id value for routing - handle both enum and string
nexaai/embedder.py CHANGED
@@ -22,7 +22,7 @@ class Embedder(BaseModel):
22
22
  pass
23
23
 
24
24
  @classmethod
25
- def _load_from(cls, model_path: str, tokenizer_file: str = "tokenizer.json", plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP):
25
+ def _load_from(cls, model_path: str, tokenizer_file: str = "tokenizer.json", plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP, **kwargs):
26
26
  """
27
27
  Load an embedder from model files, routing to appropriate implementation.
28
28
 
nexaai/image_gen.py CHANGED
@@ -71,7 +71,8 @@ class ImageGen(BaseModel):
71
71
  plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
72
72
  device_id: Optional[str] = None,
73
73
  float16: bool = True,
74
- quantize: bool = False
74
+ quantize: bool = False,
75
+ **kwargs
75
76
  ) -> 'ImageGen':
76
77
  """Load image generation model from local path, routing to appropriate implementation."""
77
78
  # Check plugin_id value for routing - handle both enum and string
nexaai/llm.py CHANGED
@@ -15,10 +15,12 @@ class LLM(BaseModel):
15
15
  @classmethod
16
16
  def _load_from(cls,
17
17
  local_path: str,
18
+ model_name: Optional[str] = None,
18
19
  tokenizer_path: Optional[str] = None,
19
20
  m_cfg: ModelConfig = ModelConfig(),
20
21
  plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
21
- device_id: Optional[str] = None
22
+ device_id: Optional[str] = None,
23
+ **kwargs
22
24
  ) -> 'LLM':
23
25
  """Load model from local path, routing to appropriate implementation."""
24
26
  # Check plugin_id value for routing - handle both enum and string
@@ -26,10 +28,10 @@ class LLM(BaseModel):
26
28
 
27
29
  if plugin_value == "mlx":
28
30
  from nexaai.llm_impl.mlx_llm_impl import MLXLLMImpl
29
- return MLXLLMImpl._load_from(local_path, tokenizer_path, m_cfg, plugin_id, device_id)
31
+ return MLXLLMImpl._load_from(local_path, model_name, tokenizer_path, m_cfg, plugin_id, device_id)
30
32
  else:
31
33
  from nexaai.llm_impl.pybind_llm_impl import PyBindLLMImpl
32
- return PyBindLLMImpl._load_from(local_path, tokenizer_path, m_cfg, plugin_id, device_id)
34
+ return PyBindLLMImpl._load_from(local_path, model_name, tokenizer_path, m_cfg, plugin_id, device_id)
33
35
 
34
36
  def cancel_generation(self):
35
37
  """Signal to cancel any ongoing stream generation."""
@@ -16,6 +16,7 @@ class MLXLLMImpl(LLM):
16
16
  @classmethod
17
17
  def _load_from(cls,
18
18
  local_path: str,
19
+ model_name: Optional[str] = None,
19
20
  tokenizer_path: Optional[str] = None,
20
21
  m_cfg: ModelConfig = ModelConfig(),
21
22
  plugin_id: Union[PluginID, str] = PluginID.MLX,
@@ -40,6 +41,7 @@ class MLXLLMImpl(LLM):
40
41
  instance = cls(m_cfg)
41
42
  instance._mlx_llm = MLXLLMInterface(
42
43
  model_path=local_path,
44
+ # model_name=model_name, # FIXME: For MLX LLM, model_name is not used
43
45
  tokenizer_path=tokenizer_path or local_path,
44
46
  config=mlx_config,
45
47
  device=device_id
@@ -19,6 +19,7 @@ class PyBindLLMImpl(LLM):
19
19
  @classmethod
20
20
  def _load_from(cls,
21
21
  local_path: str,
22
+ model_name: Optional[str] = None,
22
23
  tokenizer_path: Optional[str] = None,
23
24
  m_cfg: ModelConfig = ModelConfig(),
24
25
  plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
@@ -55,6 +56,7 @@ class PyBindLLMImpl(LLM):
55
56
  plugin_id_str = plugin_id.value if isinstance(plugin_id, PluginID) else plugin_id
56
57
  handle = llm_bind.ml_llm_create(
57
58
  model_path=local_path,
59
+ model_name=model_name,
58
60
  tokenizer_path=tokenizer_path,
59
61
  model_config=config,
60
62
  plugin_id=plugin_id_str,
@@ -1,6 +1,5 @@
1
1
  import argparse
2
2
  import json
3
- import sys
4
3
  import os
5
4
  import mlx.core as mx
6
5
  import mlx.nn as nn
@@ -10,38 +9,21 @@ import requests
10
9
  import numpy as np
11
10
  from pathlib import Path
12
11
  from huggingface_hub import snapshot_download
13
-
14
- # Add current directory to path for imports
15
- curr_dir = os.path.dirname(os.path.abspath(__file__))
16
- sys.path.append(curr_dir)
17
- sys.path.append(os.path.dirname(curr_dir))
18
-
19
- # Add the qwen3vl model directory to path
20
- qwen3vl_dir = os.path.join(curr_dir, "modeling", "models", "qwen3_vl")
21
- sys.path.append(qwen3vl_dir)
12
+ from dataclasses import dataclass
13
+ from typing import Any, Generator, List, Optional, Sequence, Tuple, Union
22
14
 
23
15
  # Import required modules for quantized loading
24
16
  from transformers import AutoTokenizer
25
17
 
26
- # Try relative imports first, fallback to sys.path approach for Nuitka compatibility
27
- try:
28
- from .modeling.models.qwen3_vl.llm_common.generate import nexa_generate_step
29
- from .modeling.models.qwen3_vl.llm_common.cache import make_prompt_cache
30
- from .modeling.models.qwen3_vl.qwen3vl import (
31
- VEGModel, LLMModel, ModelArgs, VisionConfig, TextConfig, handle_multimodal_embeds
32
- )
33
- from .modeling.models.qwen3_vl.processor import Qwen3VLProcessor
34
- except ImportError:
35
- # Fallback for Nuitka compiled environment - use sys.path approach
36
- from llm_common.generate import nexa_generate_step
37
- from llm_common.cache import make_prompt_cache
38
- from qwen3vl import VEGModel, LLMModel, ModelArgs, VisionConfig, TextConfig, handle_multimodal_embeds
39
- from processor import Qwen3VLProcessor
40
-
41
- from ml import ChatMessage
42
- from dataclasses import dataclass
43
- from typing import Any, Generator, List, Optional, Sequence, Tuple, Union
18
+ # Import from the nested modeling structure
19
+ from .modeling.models.qwen3_vl.llm_common.generate import nexa_generate_step
20
+ from .modeling.models.qwen3_vl.llm_common.cache import make_prompt_cache
21
+ from .modeling.models.qwen3_vl.qwen3vl import (
22
+ VEGModel, LLMModel, ModelArgs, VisionConfig, TextConfig, handle_multimodal_embeds
23
+ )
24
+ from .modeling.models.qwen3_vl.processor import Qwen3VLProcessor
44
25
  from .generate import GenerationResult
26
+ from ml import ChatMessage
45
27
 
46
28
  # Custom exception for context length exceeded
47
29
  class ContextLengthExceededError(Exception):
@@ -61,17 +43,156 @@ def _ensure_list(x: Union[str, List[str], None]) -> Optional[List[str]]:
61
43
  return x if isinstance(x, list) else [x]
62
44
 
63
45
 
46
+ def get_model_configs(model_name: str):
47
+ """Get model configurations based on model name"""
48
+
49
+ # 4B model configs (default)
50
+ if model_name in ["qwen3vl", "qwen3vl-4b", "qwen3vl-4b-thinking"]:
51
+ vision_config = VisionConfig(
52
+ hidden_size=1024,
53
+ intermediate_size=4096,
54
+ num_heads=16,
55
+ num_hidden_layers=24,
56
+ patch_size=16,
57
+ temporal_patch_size=2,
58
+ in_channels=3,
59
+ hidden_act="gelu",
60
+ spatial_merge_size=2,
61
+ out_hidden_size=2560,
62
+ num_position_embeddings=2304,
63
+ deepstack_visual_indexes=[5, 11, 17],
64
+ )
65
+
66
+ text_config = TextConfig(
67
+ model_type="qwen3vl",
68
+ hidden_size=2560,
69
+ num_hidden_layers=36,
70
+ intermediate_size=9728,
71
+ num_attention_heads=32,
72
+ num_key_value_heads=8,
73
+ rms_norm_eps=1e-6,
74
+ vocab_size=151936,
75
+ max_position_embeddings=32768,
76
+ rope_theta=5000000.0,
77
+ head_dim=128,
78
+ tie_word_embeddings=True,
79
+ attention_bias=False,
80
+ attention_dropout=0.0,
81
+ rope_scaling={"mrope_section": [24, 20, 20],
82
+ "rope_type": "default", "type": "default"},
83
+ )
84
+
85
+ # 8B model configs
86
+ elif model_name in ["qwen3vl-8b", "qwen3vl-8b-thinking"]:
87
+ vision_config = VisionConfig(
88
+ hidden_size=1152,
89
+ intermediate_size=4304,
90
+ num_heads=16,
91
+ num_hidden_layers=27,
92
+ patch_size=16,
93
+ temporal_patch_size=2,
94
+ in_channels=3,
95
+ hidden_act="gelu",
96
+ spatial_merge_size=2,
97
+ out_hidden_size=4096,
98
+ num_position_embeddings=2304,
99
+ deepstack_visual_indexes=[8, 16, 24],
100
+ )
101
+
102
+ text_config = TextConfig(
103
+ model_type="qwen3vl",
104
+ hidden_size=4096,
105
+ num_hidden_layers=36,
106
+ intermediate_size=12288,
107
+ num_attention_heads=32,
108
+ num_key_value_heads=8,
109
+ rms_norm_eps=1e-6,
110
+ vocab_size=151936,
111
+ max_position_embeddings=262144,
112
+ rope_theta=5000000,
113
+ head_dim=128,
114
+ tie_word_embeddings=False,
115
+ attention_bias=False,
116
+ attention_dropout=0.0,
117
+ rope_scaling={"mrope_section": [24, 20, 20], "rope_type": "default", "mrope_interleaved": True},
118
+ )
119
+ else:
120
+ # Fallback to 4B config
121
+ return get_model_configs("qwen3vl-4b")
122
+
123
+ return vision_config, text_config
124
+
125
+ def get_weight_filenames(model_name: str, model_path: Path):
126
+ """Get appropriate weight filenames based on model name and available files"""
127
+
128
+ # Determine model size and type based on the actual file structure
129
+ if "4b" in model_name:
130
+ size_prefix = "4b"
131
+ elif "8b" in model_name:
132
+ size_prefix = "8b"
133
+ else:
134
+ size_prefix = "4b"
135
+
136
+ # Determine model type
137
+ if "thinking" in model_name:
138
+ model_type = f"{size_prefix}_thinking"
139
+ else:
140
+ model_type = f"{size_prefix}_instruct"
141
+
142
+ # Try different weight file patterns matching the actual file structure
143
+ llm_patterns = [
144
+ # New naming convention matching actual files
145
+ f"qwen3vl-llm-{model_type}-q4_0.safetensors",
146
+ f"qwen3vl-llm-{model_type}-q8_0.safetensors",
147
+ f"qwen3vl-llm-{model_type}-f16.safetensors",
148
+ # Legacy naming convention
149
+ f"qwen3vl-llm-{size_prefix.upper()}-q4_0.safetensors",
150
+ f"qwen3vl-llm-{size_prefix.upper()}-q8_0.safetensors",
151
+ f"qwen3vl-llm-{size_prefix.upper()}-f16.safetensors",
152
+ f"qwen3vl-llm-{size_prefix.upper()}-f32.safetensors",
153
+ ]
154
+
155
+ vision_patterns = [
156
+ f"qwen3vl-vision-{model_type}-f16.safetensors",
157
+ f"qwen3vl-vision-{size_prefix.upper()}-f16.safetensors",
158
+ ]
159
+
160
+ # Find LLM weights
161
+ llm_weights_path = None
162
+ quantization_bits = None
163
+
164
+ for pattern in llm_patterns:
165
+ candidate_path = model_path / pattern
166
+ if candidate_path.exists():
167
+ llm_weights_path = candidate_path
168
+ if "q4_0" in pattern:
169
+ quantization_bits = 4
170
+ elif "q8_0" in pattern:
171
+ quantization_bits = 8
172
+ else:
173
+ quantization_bits = 16
174
+ break
175
+
176
+ # Find vision weights
177
+ vision_weights_path = None
178
+ for pattern in vision_patterns:
179
+ candidate_path = model_path / pattern
180
+ if candidate_path.exists():
181
+ vision_weights_path = candidate_path
182
+ break
183
+
184
+ return llm_weights_path, vision_weights_path, quantization_bits
185
+
186
+ # Update the load_qwen3_vl function signature and implementation:
64
187
  def load_qwen3_vl(
65
188
  path_or_repo: str,
66
189
  adapter_path: Optional[str] = None,
67
190
  lazy: bool = False,
68
191
  revision: Optional[str] = None,
192
+ model_name: Optional[str] = None,
69
193
  **kwargs,
70
194
  ) -> Tuple[Qwen3VLBundledModel, Qwen3VLProcessor]:
71
- """Load Qwen3-VL quantized models and processor.
72
-
73
- Parameters are aligned with .generate.load for compatibility.
74
- """
195
+ """Load Qwen3-VL quantized models and processor with support for different model sizes."""
75
196
 
76
197
  model_path = Path(path_or_repo)
77
198
  if not model_path.exists():
@@ -79,75 +200,28 @@ def load_qwen3_vl(
79
200
  model_path = Path(snapshot_download(
80
201
  repo_id=path_or_repo, repo_type="model", revision=revision))
81
202
  else:
82
- # Fallback to local modelfiles directory
83
- model_path = Path(qwen3vl_dir) / "modelfiles"
203
+ # Fallback to local modelfiles directory relative to this file
204
+ curr_dir = Path(__file__).parent
205
+ model_path = curr_dir / "modeling" / "models" / "qwen3_vl" / "modelfiles"
84
206
  if not model_path.exists():
85
- model_path = Path(curr_dir) / "modelfiles"
86
-
87
- # Model configs (kept identical to main)
88
- vision_config = VisionConfig(
89
- hidden_size=1024,
90
- intermediate_size=4096,
91
- num_heads=16,
92
- num_hidden_layers=24,
93
- patch_size=16,
94
- temporal_patch_size=2,
95
- in_channels=3,
96
- hidden_act="gelu",
97
- spatial_merge_size=2,
98
- out_hidden_size=2560,
99
- num_position_embeddings=2304,
100
- deepstack_visual_indexes=[5, 11, 17],
101
- )
207
+ model_path = curr_dir / "modelfiles"
102
208
 
103
- text_config = TextConfig(
104
- model_type="qwen3vl",
105
- hidden_size=2560,
106
- num_hidden_layers=36,
107
- intermediate_size=9728,
108
- num_attention_heads=32,
109
- num_key_value_heads=8,
110
- rms_norm_eps=1e-6,
111
- vocab_size=151936,
112
- max_position_embeddings=32768,
113
- rope_theta=5000000.0,
114
- head_dim=128,
115
- tie_word_embeddings=True,
116
- attention_bias=False,
117
- attention_dropout=0.0,
118
- rope_scaling={"mrope_section": [24, 20, 20],
119
- "rope_type": "default", "type": "default"},
120
- )
209
+ # Get model configurations based on model name
210
+ if model_name:
211
+ vision_config, text_config = get_model_configs(model_name)
212
+ else:
213
+ # Default to 4B config
214
+ vision_config, text_config = get_model_configs("qwen3vl-4b")
121
215
 
122
216
  vision_model = VEGModel(vision_config)
123
217
  llm_model = LLMModel(text_config)
124
218
 
125
- # Try to load LLM model from available files in order of preference
126
- preferred_order = [
127
- ("qwen3vl-llm-4B-q4_0.safetensors", 4),
128
- ("qwen3vl-llm-4B-q8_0.safetensors", 8),
129
- ("qwen3vl-llm-4B-f32.safetensors", 32)
130
- ]
131
-
132
- llm_weights_path = None
133
- quantization_bits = None
134
-
135
- # Try loading in order of preference
136
- for filename, bits in preferred_order:
137
- candidate_path = model_path / filename
138
- if candidate_path.exists():
139
- llm_weights_path = candidate_path
140
- quantization_bits = bits
141
- break
142
-
143
- if llm_weights_path is None:
144
- # Fallback to original hardcoded path for backward compatibility
145
- llm_weights_path = model_path / "qwen3vl-llm-4B-q4_0.safetensors"
146
- quantization_bits = 4
147
-
148
- vision_weights_path = model_path / "qwen3vl-vision-4B-f16.safetensors"
219
+ # Get appropriate weight filenames
220
+ llm_weights_path, vision_weights_path, quantization_bits = get_weight_filenames(
221
+ model_name or "qwen3vl-4b", model_path
222
+ )
149
223
 
150
- if not vision_weights_path.exists() or not llm_weights_path.exists():
224
+ if not vision_weights_path or not llm_weights_path:
151
225
  raise FileNotFoundError(
152
226
  f"Missing safetensors. Vision: {vision_weights_path}, LLM: {llm_weights_path}"
153
227
  )
@@ -163,8 +237,14 @@ def load_qwen3_vl(
163
237
 
164
238
  llm_model.load_weights(str(llm_weights_path), strict=True)
165
239
 
166
- # Tokenizer and processor
167
- tokenizer = AutoTokenizer.from_pretrained(path_or_repo)
240
+ try:
241
+ tokenizer = AutoTokenizer.from_pretrained(str(model_path))
242
+ except Exception:
243
+ try:
244
+ tokenizer = AutoTokenizer.from_pretrained(path_or_repo)
245
+ except Exception:
246
+ raise Exception("Failed to load tokenizer from the same path where model weights are loaded and original path_or_repo.")
247
+
168
248
  processor = Qwen3VLProcessor(tokenizer=tokenizer)
169
249
 
170
250
  return Qwen3VLBundledModel(vision_model=vision_model, llm_model=llm_model), processor