nexaai 1.0.16rc13__cp310-cp310-macosx_15_0_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nexaai might be problematic. Click here for more details.

Files changed (557) hide show
  1. nexaai/__init__.py +83 -0
  2. nexaai/_stub.cpython-310-darwin.so +0 -0
  3. nexaai/_version.py +4 -0
  4. nexaai/asr.py +64 -0
  5. nexaai/asr_impl/__init__.py +0 -0
  6. nexaai/asr_impl/mlx_asr_impl.py +92 -0
  7. nexaai/asr_impl/pybind_asr_impl.py +44 -0
  8. nexaai/base.py +39 -0
  9. nexaai/binds/__init__.py +4 -0
  10. nexaai/binds/common_bind.cpython-310-darwin.so +0 -0
  11. nexaai/binds/embedder_bind.cpython-310-darwin.so +0 -0
  12. nexaai/binds/libnexa_bridge.dylib +0 -0
  13. nexaai/binds/llm_bind.cpython-310-darwin.so +0 -0
  14. nexaai/binds/nexa_llama_cpp/libggml-base.dylib +0 -0
  15. nexaai/binds/nexa_llama_cpp/libggml-cpu.so +0 -0
  16. nexaai/binds/nexa_llama_cpp/libggml-metal.so +0 -0
  17. nexaai/binds/nexa_llama_cpp/libggml.dylib +0 -0
  18. nexaai/binds/nexa_llama_cpp/libllama.dylib +0 -0
  19. nexaai/binds/nexa_llama_cpp/libmtmd.dylib +0 -0
  20. nexaai/binds/nexa_llama_cpp/libnexa_plugin.dylib +0 -0
  21. nexaai/binds/nexa_mlx/libnexa_plugin.dylib +0 -0
  22. nexaai/binds/nexa_mlx/py-lib/ml.py +888 -0
  23. nexaai/binds/nexa_mlx/py-lib/mlx_audio/__init__.py +0 -0
  24. nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/__init__.py +1 -0
  25. nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/__init__.py +5 -0
  26. nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/bigvgan/__init__.py +1 -0
  27. nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/bigvgan/activation.py +51 -0
  28. nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/bigvgan/amp.py +96 -0
  29. nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/bigvgan/bigvgan.py +149 -0
  30. nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/bigvgan/conv.py +114 -0
  31. nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/bigvgan/resample.py +177 -0
  32. nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/descript/__init__.py +1 -0
  33. nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/descript/base.py +228 -0
  34. nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/descript/dac.py +285 -0
  35. nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/descript/nn/__init__.py +1 -0
  36. nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/descript/nn/layers.py +129 -0
  37. nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/descript/nn/quantize.py +149 -0
  38. nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/encodec/__init__.py +1 -0
  39. nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/encodec/encodec.py +777 -0
  40. nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/mimi/__init__.py +1 -0
  41. nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/mimi/mimi.py +286 -0
  42. nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/mimi/modules/__init__.py +20 -0
  43. nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/mimi/modules/conv.py +398 -0
  44. nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/mimi/modules/kv_cache.py +199 -0
  45. nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/mimi/modules/quantization.py +179 -0
  46. nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/mimi/modules/seanet.py +314 -0
  47. nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/mimi/modules/transformer.py +256 -0
  48. nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/s3/__init__.py +1 -0
  49. nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/s3/model.py +260 -0
  50. nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/s3/model_v2.py +383 -0
  51. nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/s3/utils.py +122 -0
  52. nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/snac/__init__.py +1 -0
  53. nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/snac/attention.py +97 -0
  54. nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/snac/layers.py +306 -0
  55. nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/snac/snac.py +154 -0
  56. nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/snac/vq.py +135 -0
  57. nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/vocos/__init__.py +1 -0
  58. nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/vocos/mel.py +33 -0
  59. nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/vocos/vocos.py +359 -0
  60. nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/tests/__init__.py +0 -0
  61. nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/tests/test_bigvgan.py +54 -0
  62. nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/tests/test_descript.py +109 -0
  63. nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/tests/test_encodec.py +58 -0
  64. nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/tests/test_mimi.py +22 -0
  65. nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/tests/test_s3.py +25 -0
  66. nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/tests/test_snac.py +40 -0
  67. nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/tests/test_vocos.py +93 -0
  68. nexaai/binds/nexa_mlx/py-lib/mlx_audio/server.py +525 -0
  69. nexaai/binds/nexa_mlx/py-lib/mlx_audio/sts/__init__.py +0 -0
  70. nexaai/binds/nexa_mlx/py-lib/mlx_audio/sts/tests/test_voice_pipeline.py +156 -0
  71. nexaai/binds/nexa_mlx/py-lib/mlx_audio/sts/voice_pipeline.py +327 -0
  72. nexaai/binds/nexa_mlx/py-lib/mlx_audio/stt/__init__.py +0 -0
  73. nexaai/binds/nexa_mlx/py-lib/mlx_audio/stt/generate.py +174 -0
  74. nexaai/binds/nexa_mlx/py-lib/mlx_audio/stt/models/__init__.py +0 -0
  75. nexaai/binds/nexa_mlx/py-lib/mlx_audio/stt/models/parakeet/__init__.py +1 -0
  76. nexaai/binds/nexa_mlx/py-lib/mlx_audio/stt/models/parakeet/alignment.py +248 -0
  77. nexaai/binds/nexa_mlx/py-lib/mlx_audio/stt/models/parakeet/attention.py +187 -0
  78. nexaai/binds/nexa_mlx/py-lib/mlx_audio/stt/models/parakeet/audio.py +76 -0
  79. nexaai/binds/nexa_mlx/py-lib/mlx_audio/stt/models/parakeet/conformer.py +331 -0
  80. nexaai/binds/nexa_mlx/py-lib/mlx_audio/stt/models/parakeet/ctc.py +34 -0
  81. nexaai/binds/nexa_mlx/py-lib/mlx_audio/stt/models/parakeet/parakeet.py +604 -0
  82. nexaai/binds/nexa_mlx/py-lib/mlx_audio/stt/models/parakeet/rnnt.py +157 -0
  83. nexaai/binds/nexa_mlx/py-lib/mlx_audio/stt/models/parakeet/tokenizer.py +2 -0
  84. nexaai/binds/nexa_mlx/py-lib/mlx_audio/stt/models/wav2vec/feature_extractor.py +757 -0
  85. nexaai/binds/nexa_mlx/py-lib/mlx_audio/stt/models/wav2vec/wav2vec.py +738 -0
  86. nexaai/binds/nexa_mlx/py-lib/mlx_audio/stt/models/whisper/__init__.py +1 -0
  87. nexaai/binds/nexa_mlx/py-lib/mlx_audio/stt/models/whisper/audio.py +82 -0
  88. nexaai/binds/nexa_mlx/py-lib/mlx_audio/stt/models/whisper/decoding.py +742 -0
  89. nexaai/binds/nexa_mlx/py-lib/mlx_audio/stt/models/whisper/timing.py +329 -0
  90. nexaai/binds/nexa_mlx/py-lib/mlx_audio/stt/models/whisper/tokenizer.py +398 -0
  91. nexaai/binds/nexa_mlx/py-lib/mlx_audio/stt/models/whisper/whisper.py +862 -0
  92. nexaai/binds/nexa_mlx/py-lib/mlx_audio/stt/models/whisper/writers.py +268 -0
  93. nexaai/binds/nexa_mlx/py-lib/mlx_audio/stt/tests/test_models.py +381 -0
  94. nexaai/binds/nexa_mlx/py-lib/mlx_audio/stt/utils.py +195 -0
  95. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/__init__.py +1 -0
  96. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/audio_player.py +120 -0
  97. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/convert.py +71 -0
  98. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/generate.py +449 -0
  99. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/__init__.py +0 -0
  100. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/bark/__init__.py +4 -0
  101. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/bark/bark.py +528 -0
  102. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/bark/isftnet.py +12 -0
  103. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/bark/pipeline.py +442 -0
  104. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/base.py +84 -0
  105. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/dia/__init__.py +1 -0
  106. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/dia/audio.py +287 -0
  107. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/dia/config.py +256 -0
  108. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/dia/dia.py +592 -0
  109. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/dia/layers.py +870 -0
  110. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/indextts/__init__.py +3 -0
  111. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/indextts/attention.py +180 -0
  112. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/indextts/bigvgan.py +124 -0
  113. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/indextts/conformer.py +247 -0
  114. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/indextts/ecapa_tdnn/__init__.py +0 -0
  115. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/indextts/ecapa_tdnn/asp.py +59 -0
  116. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/indextts/ecapa_tdnn/ecapa_tdnn.py +91 -0
  117. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/indextts/ecapa_tdnn/se_res2net.py +132 -0
  118. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/indextts/ecapa_tdnn/tdnn.py +42 -0
  119. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/indextts/gpt2.py +38 -0
  120. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/indextts/indextts.py +412 -0
  121. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/indextts/mel.py +37 -0
  122. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/indextts/normalize.py +294 -0
  123. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/indextts/perceiver.py +62 -0
  124. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/interpolate.py +108 -0
  125. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/kokoro/__init__.py +4 -0
  126. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/kokoro/istftnet.py +979 -0
  127. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/kokoro/kokoro.py +331 -0
  128. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/kokoro/modules.py +659 -0
  129. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/kokoro/pipeline.py +453 -0
  130. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/kokoro/voice.py +113 -0
  131. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/llama/__init__.py +3 -0
  132. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/llama/llama.py +324 -0
  133. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/outetts/__init__.py +1 -0
  134. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/outetts/audio_processor.py +351 -0
  135. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/outetts/dac_interface.py +162 -0
  136. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/outetts/outetts.py +255 -0
  137. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/outetts/prompt_processor.py +181 -0
  138. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/outetts/tokens.py +36 -0
  139. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/sesame/__init__.py +3 -0
  140. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/sesame/attention.py +195 -0
  141. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/sesame/sesame.py +633 -0
  142. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/sesame/watermarking.py +105 -0
  143. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/spark/__init__.py +1 -0
  144. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/spark/audio_tokenizer.py +138 -0
  145. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/spark/bicodec.py +269 -0
  146. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/spark/modules/__init__.py +0 -0
  147. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/spark/modules/blocks/__init__.py +0 -0
  148. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/spark/modules/blocks/sampler.py +111 -0
  149. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/spark/modules/encoder_decoder/__init__.py +0 -0
  150. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/spark/modules/encoder_decoder/feat_decoder.py +120 -0
  151. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/spark/modules/encoder_decoder/feat_encoder.py +136 -0
  152. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/spark/modules/encoder_decoder/wave_generator.py +113 -0
  153. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/spark/modules/finite_scalar_quantization.py +238 -0
  154. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/spark/modules/residual.py +209 -0
  155. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/spark/modules/residual_fsq.py +309 -0
  156. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/spark/modules/speaker/__init__.py +1 -0
  157. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/spark/modules/speaker/ecapa_tdnn.py +283 -0
  158. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/spark/modules/speaker/perceiver_encoder.py +326 -0
  159. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/spark/modules/speaker/pooling_layers.py +297 -0
  160. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/spark/modules/speaker/speaker_encoder.py +155 -0
  161. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/spark/spark.py +382 -0
  162. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/spark/utils/audio.py +220 -0
  163. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/spark/utils/file.py +221 -0
  164. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/spark/utils/token_parser.py +181 -0
  165. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/tests/__init__.py +0 -0
  166. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/tests/test_base.py +66 -0
  167. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/tests/test_convert.py +173 -0
  168. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/tests/test_interpolate.py +88 -0
  169. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/tests/test_models.py +974 -0
  170. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/utils.py +337 -0
  171. nexaai/binds/nexa_mlx/py-lib/mlx_audio/utils.py +237 -0
  172. nexaai/binds/nexa_mlx/py-lib/mlx_audio/version.py +1 -0
  173. nexaai/binds/nexa_mlx/py-lib/profiling.py +239 -0
  174. nexaai/binds/nexa_nexaml/libggml-base.dylib +0 -0
  175. nexaai/binds/nexa_nexaml/libggml-cpu.so +0 -0
  176. nexaai/binds/nexa_nexaml/libggml-metal.so +0 -0
  177. nexaai/binds/nexa_nexaml/libggml.dylib +0 -0
  178. nexaai/binds/nexa_nexaml/libnexa-mm-process.dylib +0 -0
  179. nexaai/binds/nexa_nexaml/libnexa-sampling.dylib +0 -0
  180. nexaai/binds/nexa_nexaml/libnexa_plugin.dylib +0 -0
  181. nexaai/binds/nexa_nexaml/libnexaproc.dylib +0 -0
  182. nexaai/binds/nexa_nexaml/libqwen3-vl.dylib +0 -0
  183. nexaai/binds/nexa_nexaml/libqwen3vl-vision.dylib +0 -0
  184. nexaai/binds/vlm_bind.cpython-310-darwin.so +0 -0
  185. nexaai/common.py +104 -0
  186. nexaai/cv.py +92 -0
  187. nexaai/cv_impl/__init__.py +0 -0
  188. nexaai/cv_impl/mlx_cv_impl.py +89 -0
  189. nexaai/cv_impl/pybind_cv_impl.py +32 -0
  190. nexaai/embedder.py +72 -0
  191. nexaai/embedder_impl/__init__.py +0 -0
  192. nexaai/embedder_impl/mlx_embedder_impl.py +116 -0
  193. nexaai/embedder_impl/pybind_embedder_impl.py +95 -0
  194. nexaai/image_gen.py +140 -0
  195. nexaai/image_gen_impl/__init__.py +0 -0
  196. nexaai/image_gen_impl/mlx_image_gen_impl.py +292 -0
  197. nexaai/image_gen_impl/pybind_image_gen_impl.py +85 -0
  198. nexaai/llm.py +96 -0
  199. nexaai/llm_impl/__init__.py +0 -0
  200. nexaai/llm_impl/mlx_llm_impl.py +269 -0
  201. nexaai/llm_impl/pybind_llm_impl.py +218 -0
  202. nexaai/log.py +92 -0
  203. nexaai/mlx_backend/asr/__init__.py +12 -0
  204. nexaai/mlx_backend/asr/interface.py +122 -0
  205. nexaai/mlx_backend/common/__init__.py +0 -0
  206. nexaai/mlx_backend/common/utils.py +25 -0
  207. nexaai/mlx_backend/cv/__init__.py +0 -0
  208. nexaai/mlx_backend/cv/generate.py +195 -0
  209. nexaai/mlx_backend/cv/interface.py +151 -0
  210. nexaai/mlx_backend/cv/main.py +81 -0
  211. nexaai/mlx_backend/cv/modeling/pp_ocr_v4.py +1736 -0
  212. nexaai/mlx_backend/embedding/__init__.py +0 -0
  213. nexaai/mlx_backend/embedding/generate.py +333 -0
  214. nexaai/mlx_backend/embedding/interface.py +617 -0
  215. nexaai/mlx_backend/embedding/main.py +173 -0
  216. nexaai/mlx_backend/embedding/modeling/__init__.py +0 -0
  217. nexaai/mlx_backend/embedding/modeling/nexa_jina_v2.py +399 -0
  218. nexaai/mlx_backend/image_gen/__init__.py +1 -0
  219. nexaai/mlx_backend/image_gen/generate_sd.py +244 -0
  220. nexaai/mlx_backend/image_gen/interface.py +82 -0
  221. nexaai/mlx_backend/image_gen/main.py +281 -0
  222. nexaai/mlx_backend/image_gen/stable_diffusion/__init__.py +306 -0
  223. nexaai/mlx_backend/image_gen/stable_diffusion/clip.py +116 -0
  224. nexaai/mlx_backend/image_gen/stable_diffusion/config.py +65 -0
  225. nexaai/mlx_backend/image_gen/stable_diffusion/model_io.py +386 -0
  226. nexaai/mlx_backend/image_gen/stable_diffusion/sampler.py +105 -0
  227. nexaai/mlx_backend/image_gen/stable_diffusion/tokenizer.py +100 -0
  228. nexaai/mlx_backend/image_gen/stable_diffusion/unet.py +460 -0
  229. nexaai/mlx_backend/image_gen/stable_diffusion/vae.py +274 -0
  230. nexaai/mlx_backend/llm/__init__.py +0 -0
  231. nexaai/mlx_backend/llm/generate.py +149 -0
  232. nexaai/mlx_backend/llm/interface.py +764 -0
  233. nexaai/mlx_backend/llm/main.py +68 -0
  234. nexaai/mlx_backend/ml.py +888 -0
  235. nexaai/mlx_backend/mlx_audio/__init__.py +0 -0
  236. nexaai/mlx_backend/mlx_audio/codec/__init__.py +1 -0
  237. nexaai/mlx_backend/mlx_audio/codec/models/__init__.py +5 -0
  238. nexaai/mlx_backend/mlx_audio/codec/models/bigvgan/__init__.py +1 -0
  239. nexaai/mlx_backend/mlx_audio/codec/models/bigvgan/activation.py +51 -0
  240. nexaai/mlx_backend/mlx_audio/codec/models/bigvgan/amp.py +96 -0
  241. nexaai/mlx_backend/mlx_audio/codec/models/bigvgan/bigvgan.py +149 -0
  242. nexaai/mlx_backend/mlx_audio/codec/models/bigvgan/conv.py +114 -0
  243. nexaai/mlx_backend/mlx_audio/codec/models/bigvgan/resample.py +177 -0
  244. nexaai/mlx_backend/mlx_audio/codec/models/descript/__init__.py +1 -0
  245. nexaai/mlx_backend/mlx_audio/codec/models/descript/base.py +228 -0
  246. nexaai/mlx_backend/mlx_audio/codec/models/descript/dac.py +285 -0
  247. nexaai/mlx_backend/mlx_audio/codec/models/descript/nn/__init__.py +1 -0
  248. nexaai/mlx_backend/mlx_audio/codec/models/descript/nn/layers.py +129 -0
  249. nexaai/mlx_backend/mlx_audio/codec/models/descript/nn/quantize.py +149 -0
  250. nexaai/mlx_backend/mlx_audio/codec/models/encodec/__init__.py +1 -0
  251. nexaai/mlx_backend/mlx_audio/codec/models/encodec/encodec.py +777 -0
  252. nexaai/mlx_backend/mlx_audio/codec/models/mimi/__init__.py +1 -0
  253. nexaai/mlx_backend/mlx_audio/codec/models/mimi/mimi.py +286 -0
  254. nexaai/mlx_backend/mlx_audio/codec/models/mimi/modules/__init__.py +20 -0
  255. nexaai/mlx_backend/mlx_audio/codec/models/mimi/modules/conv.py +398 -0
  256. nexaai/mlx_backend/mlx_audio/codec/models/mimi/modules/kv_cache.py +199 -0
  257. nexaai/mlx_backend/mlx_audio/codec/models/mimi/modules/quantization.py +179 -0
  258. nexaai/mlx_backend/mlx_audio/codec/models/mimi/modules/seanet.py +314 -0
  259. nexaai/mlx_backend/mlx_audio/codec/models/mimi/modules/transformer.py +256 -0
  260. nexaai/mlx_backend/mlx_audio/codec/models/s3/__init__.py +1 -0
  261. nexaai/mlx_backend/mlx_audio/codec/models/s3/model.py +260 -0
  262. nexaai/mlx_backend/mlx_audio/codec/models/s3/model_v2.py +383 -0
  263. nexaai/mlx_backend/mlx_audio/codec/models/s3/utils.py +122 -0
  264. nexaai/mlx_backend/mlx_audio/codec/models/snac/__init__.py +1 -0
  265. nexaai/mlx_backend/mlx_audio/codec/models/snac/attention.py +97 -0
  266. nexaai/mlx_backend/mlx_audio/codec/models/snac/layers.py +306 -0
  267. nexaai/mlx_backend/mlx_audio/codec/models/snac/snac.py +154 -0
  268. nexaai/mlx_backend/mlx_audio/codec/models/snac/vq.py +135 -0
  269. nexaai/mlx_backend/mlx_audio/codec/models/vocos/__init__.py +1 -0
  270. nexaai/mlx_backend/mlx_audio/codec/models/vocos/mel.py +33 -0
  271. nexaai/mlx_backend/mlx_audio/codec/models/vocos/vocos.py +359 -0
  272. nexaai/mlx_backend/mlx_audio/codec/tests/__init__.py +0 -0
  273. nexaai/mlx_backend/mlx_audio/codec/tests/test_bigvgan.py +54 -0
  274. nexaai/mlx_backend/mlx_audio/codec/tests/test_descript.py +109 -0
  275. nexaai/mlx_backend/mlx_audio/codec/tests/test_encodec.py +58 -0
  276. nexaai/mlx_backend/mlx_audio/codec/tests/test_mimi.py +22 -0
  277. nexaai/mlx_backend/mlx_audio/codec/tests/test_s3.py +25 -0
  278. nexaai/mlx_backend/mlx_audio/codec/tests/test_snac.py +40 -0
  279. nexaai/mlx_backend/mlx_audio/codec/tests/test_vocos.py +93 -0
  280. nexaai/mlx_backend/mlx_audio/server.py +525 -0
  281. nexaai/mlx_backend/mlx_audio/sts/__init__.py +0 -0
  282. nexaai/mlx_backend/mlx_audio/sts/tests/test_voice_pipeline.py +156 -0
  283. nexaai/mlx_backend/mlx_audio/sts/voice_pipeline.py +327 -0
  284. nexaai/mlx_backend/mlx_audio/stt/__init__.py +0 -0
  285. nexaai/mlx_backend/mlx_audio/stt/generate.py +174 -0
  286. nexaai/mlx_backend/mlx_audio/stt/models/__init__.py +0 -0
  287. nexaai/mlx_backend/mlx_audio/stt/models/parakeet/__init__.py +1 -0
  288. nexaai/mlx_backend/mlx_audio/stt/models/parakeet/alignment.py +248 -0
  289. nexaai/mlx_backend/mlx_audio/stt/models/parakeet/attention.py +187 -0
  290. nexaai/mlx_backend/mlx_audio/stt/models/parakeet/audio.py +76 -0
  291. nexaai/mlx_backend/mlx_audio/stt/models/parakeet/conformer.py +331 -0
  292. nexaai/mlx_backend/mlx_audio/stt/models/parakeet/ctc.py +34 -0
  293. nexaai/mlx_backend/mlx_audio/stt/models/parakeet/parakeet.py +604 -0
  294. nexaai/mlx_backend/mlx_audio/stt/models/parakeet/rnnt.py +157 -0
  295. nexaai/mlx_backend/mlx_audio/stt/models/parakeet/tokenizer.py +2 -0
  296. nexaai/mlx_backend/mlx_audio/stt/models/wav2vec/feature_extractor.py +757 -0
  297. nexaai/mlx_backend/mlx_audio/stt/models/wav2vec/wav2vec.py +738 -0
  298. nexaai/mlx_backend/mlx_audio/stt/models/whisper/__init__.py +1 -0
  299. nexaai/mlx_backend/mlx_audio/stt/models/whisper/audio.py +82 -0
  300. nexaai/mlx_backend/mlx_audio/stt/models/whisper/decoding.py +742 -0
  301. nexaai/mlx_backend/mlx_audio/stt/models/whisper/timing.py +329 -0
  302. nexaai/mlx_backend/mlx_audio/stt/models/whisper/tokenizer.py +398 -0
  303. nexaai/mlx_backend/mlx_audio/stt/models/whisper/whisper.py +862 -0
  304. nexaai/mlx_backend/mlx_audio/stt/models/whisper/writers.py +268 -0
  305. nexaai/mlx_backend/mlx_audio/stt/tests/test_models.py +381 -0
  306. nexaai/mlx_backend/mlx_audio/stt/utils.py +195 -0
  307. nexaai/mlx_backend/mlx_audio/tts/__init__.py +1 -0
  308. nexaai/mlx_backend/mlx_audio/tts/audio_player.py +120 -0
  309. nexaai/mlx_backend/mlx_audio/tts/convert.py +71 -0
  310. nexaai/mlx_backend/mlx_audio/tts/generate.py +449 -0
  311. nexaai/mlx_backend/mlx_audio/tts/models/__init__.py +0 -0
  312. nexaai/mlx_backend/mlx_audio/tts/models/bark/__init__.py +4 -0
  313. nexaai/mlx_backend/mlx_audio/tts/models/bark/bark.py +528 -0
  314. nexaai/mlx_backend/mlx_audio/tts/models/bark/isftnet.py +12 -0
  315. nexaai/mlx_backend/mlx_audio/tts/models/bark/pipeline.py +442 -0
  316. nexaai/mlx_backend/mlx_audio/tts/models/base.py +84 -0
  317. nexaai/mlx_backend/mlx_audio/tts/models/dia/__init__.py +1 -0
  318. nexaai/mlx_backend/mlx_audio/tts/models/dia/audio.py +287 -0
  319. nexaai/mlx_backend/mlx_audio/tts/models/dia/config.py +256 -0
  320. nexaai/mlx_backend/mlx_audio/tts/models/dia/dia.py +592 -0
  321. nexaai/mlx_backend/mlx_audio/tts/models/dia/layers.py +870 -0
  322. nexaai/mlx_backend/mlx_audio/tts/models/indextts/__init__.py +3 -0
  323. nexaai/mlx_backend/mlx_audio/tts/models/indextts/attention.py +180 -0
  324. nexaai/mlx_backend/mlx_audio/tts/models/indextts/bigvgan.py +124 -0
  325. nexaai/mlx_backend/mlx_audio/tts/models/indextts/conformer.py +247 -0
  326. nexaai/mlx_backend/mlx_audio/tts/models/indextts/ecapa_tdnn/__init__.py +0 -0
  327. nexaai/mlx_backend/mlx_audio/tts/models/indextts/ecapa_tdnn/asp.py +59 -0
  328. nexaai/mlx_backend/mlx_audio/tts/models/indextts/ecapa_tdnn/ecapa_tdnn.py +91 -0
  329. nexaai/mlx_backend/mlx_audio/tts/models/indextts/ecapa_tdnn/se_res2net.py +132 -0
  330. nexaai/mlx_backend/mlx_audio/tts/models/indextts/ecapa_tdnn/tdnn.py +42 -0
  331. nexaai/mlx_backend/mlx_audio/tts/models/indextts/gpt2.py +38 -0
  332. nexaai/mlx_backend/mlx_audio/tts/models/indextts/indextts.py +412 -0
  333. nexaai/mlx_backend/mlx_audio/tts/models/indextts/mel.py +37 -0
  334. nexaai/mlx_backend/mlx_audio/tts/models/indextts/normalize.py +294 -0
  335. nexaai/mlx_backend/mlx_audio/tts/models/indextts/perceiver.py +62 -0
  336. nexaai/mlx_backend/mlx_audio/tts/models/interpolate.py +108 -0
  337. nexaai/mlx_backend/mlx_audio/tts/models/kokoro/__init__.py +4 -0
  338. nexaai/mlx_backend/mlx_audio/tts/models/kokoro/istftnet.py +979 -0
  339. nexaai/mlx_backend/mlx_audio/tts/models/kokoro/kokoro.py +331 -0
  340. nexaai/mlx_backend/mlx_audio/tts/models/kokoro/modules.py +659 -0
  341. nexaai/mlx_backend/mlx_audio/tts/models/kokoro/pipeline.py +453 -0
  342. nexaai/mlx_backend/mlx_audio/tts/models/kokoro/voice.py +113 -0
  343. nexaai/mlx_backend/mlx_audio/tts/models/llama/__init__.py +3 -0
  344. nexaai/mlx_backend/mlx_audio/tts/models/llama/llama.py +324 -0
  345. nexaai/mlx_backend/mlx_audio/tts/models/outetts/__init__.py +1 -0
  346. nexaai/mlx_backend/mlx_audio/tts/models/outetts/audio_processor.py +351 -0
  347. nexaai/mlx_backend/mlx_audio/tts/models/outetts/dac_interface.py +162 -0
  348. nexaai/mlx_backend/mlx_audio/tts/models/outetts/default_speaker.json +461 -0
  349. nexaai/mlx_backend/mlx_audio/tts/models/outetts/outetts.py +255 -0
  350. nexaai/mlx_backend/mlx_audio/tts/models/outetts/prompt_processor.py +181 -0
  351. nexaai/mlx_backend/mlx_audio/tts/models/outetts/tokens.py +36 -0
  352. nexaai/mlx_backend/mlx_audio/tts/models/sesame/__init__.py +3 -0
  353. nexaai/mlx_backend/mlx_audio/tts/models/sesame/attention.py +195 -0
  354. nexaai/mlx_backend/mlx_audio/tts/models/sesame/sesame.py +633 -0
  355. nexaai/mlx_backend/mlx_audio/tts/models/sesame/watermarking.py +105 -0
  356. nexaai/mlx_backend/mlx_audio/tts/models/spark/__init__.py +1 -0
  357. nexaai/mlx_backend/mlx_audio/tts/models/spark/audio_tokenizer.py +138 -0
  358. nexaai/mlx_backend/mlx_audio/tts/models/spark/bicodec.py +269 -0
  359. nexaai/mlx_backend/mlx_audio/tts/models/spark/modules/__init__.py +0 -0
  360. nexaai/mlx_backend/mlx_audio/tts/models/spark/modules/blocks/__init__.py +0 -0
  361. nexaai/mlx_backend/mlx_audio/tts/models/spark/modules/blocks/sampler.py +111 -0
  362. nexaai/mlx_backend/mlx_audio/tts/models/spark/modules/encoder_decoder/__init__.py +0 -0
  363. nexaai/mlx_backend/mlx_audio/tts/models/spark/modules/encoder_decoder/feat_decoder.py +120 -0
  364. nexaai/mlx_backend/mlx_audio/tts/models/spark/modules/encoder_decoder/feat_encoder.py +136 -0
  365. nexaai/mlx_backend/mlx_audio/tts/models/spark/modules/encoder_decoder/wave_generator.py +113 -0
  366. nexaai/mlx_backend/mlx_audio/tts/models/spark/modules/finite_scalar_quantization.py +238 -0
  367. nexaai/mlx_backend/mlx_audio/tts/models/spark/modules/residual.py +209 -0
  368. nexaai/mlx_backend/mlx_audio/tts/models/spark/modules/residual_fsq.py +309 -0
  369. nexaai/mlx_backend/mlx_audio/tts/models/spark/modules/speaker/__init__.py +1 -0
  370. nexaai/mlx_backend/mlx_audio/tts/models/spark/modules/speaker/ecapa_tdnn.py +283 -0
  371. nexaai/mlx_backend/mlx_audio/tts/models/spark/modules/speaker/perceiver_encoder.py +326 -0
  372. nexaai/mlx_backend/mlx_audio/tts/models/spark/modules/speaker/pooling_layers.py +297 -0
  373. nexaai/mlx_backend/mlx_audio/tts/models/spark/modules/speaker/speaker_encoder.py +155 -0
  374. nexaai/mlx_backend/mlx_audio/tts/models/spark/spark.py +382 -0
  375. nexaai/mlx_backend/mlx_audio/tts/models/spark/utils/audio.py +220 -0
  376. nexaai/mlx_backend/mlx_audio/tts/models/spark/utils/file.py +221 -0
  377. nexaai/mlx_backend/mlx_audio/tts/models/spark/utils/token_parser.py +181 -0
  378. nexaai/mlx_backend/mlx_audio/tts/tests/__init__.py +0 -0
  379. nexaai/mlx_backend/mlx_audio/tts/tests/test_base.py +66 -0
  380. nexaai/mlx_backend/mlx_audio/tts/tests/test_convert.py +173 -0
  381. nexaai/mlx_backend/mlx_audio/tts/tests/test_interpolate.py +88 -0
  382. nexaai/mlx_backend/mlx_audio/tts/tests/test_models.py +974 -0
  383. nexaai/mlx_backend/mlx_audio/tts/utils.py +337 -0
  384. nexaai/mlx_backend/mlx_audio/utils.py +237 -0
  385. nexaai/mlx_backend/mlx_audio/version.py +1 -0
  386. nexaai/mlx_backend/profiling.py +239 -0
  387. nexaai/mlx_backend/rerank/__init__.py +0 -0
  388. nexaai/mlx_backend/rerank/generate.py +174 -0
  389. nexaai/mlx_backend/rerank/interface.py +287 -0
  390. nexaai/mlx_backend/rerank/main.py +127 -0
  391. nexaai/mlx_backend/rerank/modeling/__init__.py +0 -0
  392. nexaai/mlx_backend/rerank/modeling/nexa_jina_rerank.py +330 -0
  393. nexaai/mlx_backend/sd/__init__.py +1 -0
  394. nexaai/mlx_backend/sd/interface.py +362 -0
  395. nexaai/mlx_backend/sd/main.py +286 -0
  396. nexaai/mlx_backend/sd/modeling/__init__.py +306 -0
  397. nexaai/mlx_backend/sd/modeling/clip.py +116 -0
  398. nexaai/mlx_backend/sd/modeling/config.py +65 -0
  399. nexaai/mlx_backend/sd/modeling/model_io.py +385 -0
  400. nexaai/mlx_backend/sd/modeling/sampler.py +105 -0
  401. nexaai/mlx_backend/sd/modeling/tokenizer.py +100 -0
  402. nexaai/mlx_backend/sd/modeling/unet.py +460 -0
  403. nexaai/mlx_backend/sd/modeling/vae.py +274 -0
  404. nexaai/mlx_backend/tts/__init__.py +12 -0
  405. nexaai/mlx_backend/tts/interface.py +276 -0
  406. nexaai/mlx_backend/vlm/__init__.py +3 -0
  407. nexaai/mlx_backend/vlm/generate.py +572 -0
  408. nexaai/mlx_backend/vlm/generate_qwen3_vl.py +261 -0
  409. nexaai/mlx_backend/vlm/interface.py +415 -0
  410. nexaai/mlx_backend/vlm/main.py +316 -0
  411. nexaai/mlx_backend/vlm/modeling/__init__.py +0 -0
  412. nexaai/mlx_backend/vlm/modeling/convert.py +68 -0
  413. nexaai/mlx_backend/vlm/modeling/models/__init__.py +0 -0
  414. nexaai/mlx_backend/vlm/modeling/models/aya_vision/__init__.py +8 -0
  415. nexaai/mlx_backend/vlm/modeling/models/aya_vision/aya_vision.py +193 -0
  416. nexaai/mlx_backend/vlm/modeling/models/aya_vision/interpolate.py +186 -0
  417. nexaai/mlx_backend/vlm/modeling/models/aya_vision/language.py +233 -0
  418. nexaai/mlx_backend/vlm/modeling/models/aya_vision/vision.py +503 -0
  419. nexaai/mlx_backend/vlm/modeling/models/base.py +202 -0
  420. nexaai/mlx_backend/vlm/modeling/models/cache.py +230 -0
  421. nexaai/mlx_backend/vlm/modeling/models/deepseek_vl_v2/__init__.py +10 -0
  422. nexaai/mlx_backend/vlm/modeling/models/deepseek_vl_v2/conversation.py +264 -0
  423. nexaai/mlx_backend/vlm/modeling/models/deepseek_vl_v2/deepseek_vl_v2.py +472 -0
  424. nexaai/mlx_backend/vlm/modeling/models/deepseek_vl_v2/language.py +591 -0
  425. nexaai/mlx_backend/vlm/modeling/models/deepseek_vl_v2/processing_deepsek_vl_v2.py +526 -0
  426. nexaai/mlx_backend/vlm/modeling/models/deepseek_vl_v2/vision.py +356 -0
  427. nexaai/mlx_backend/vlm/modeling/models/florence2/__init__.py +8 -0
  428. nexaai/mlx_backend/vlm/modeling/models/florence2/florence2.py +366 -0
  429. nexaai/mlx_backend/vlm/modeling/models/florence2/language.py +488 -0
  430. nexaai/mlx_backend/vlm/modeling/models/florence2/vision.py +591 -0
  431. nexaai/mlx_backend/vlm/modeling/models/gemma3/__init__.py +8 -0
  432. nexaai/mlx_backend/vlm/modeling/models/gemma3/gemma3.py +213 -0
  433. nexaai/mlx_backend/vlm/modeling/models/gemma3/language.py +315 -0
  434. nexaai/mlx_backend/vlm/modeling/models/gemma3/vision.py +238 -0
  435. nexaai/mlx_backend/vlm/modeling/models/gemma3n/__init__.py +2 -0
  436. nexaai/mlx_backend/vlm/modeling/models/gemma3n/audio.py +1038 -0
  437. nexaai/mlx_backend/vlm/modeling/models/gemma3n/config.py +139 -0
  438. nexaai/mlx_backend/vlm/modeling/models/gemma3n/gemma3n.py +322 -0
  439. nexaai/mlx_backend/vlm/modeling/models/gemma3n/language.py +629 -0
  440. nexaai/mlx_backend/vlm/modeling/models/gemma3n/vision.py +1022 -0
  441. nexaai/mlx_backend/vlm/modeling/models/idefics2/__init__.py +9 -0
  442. nexaai/mlx_backend/vlm/modeling/models/idefics2/idefics2.py +294 -0
  443. nexaai/mlx_backend/vlm/modeling/models/idefics2/language.py +191 -0
  444. nexaai/mlx_backend/vlm/modeling/models/idefics2/vision.py +267 -0
  445. nexaai/mlx_backend/vlm/modeling/models/idefics3/__init__.py +8 -0
  446. nexaai/mlx_backend/vlm/modeling/models/idefics3/idefics3.py +175 -0
  447. nexaai/mlx_backend/vlm/modeling/models/idefics3/language.py +192 -0
  448. nexaai/mlx_backend/vlm/modeling/models/idefics3/vision.py +233 -0
  449. nexaai/mlx_backend/vlm/modeling/models/internvl_chat/__init__.py +9 -0
  450. nexaai/mlx_backend/vlm/modeling/models/internvl_chat/internvl_chat.py +140 -0
  451. nexaai/mlx_backend/vlm/modeling/models/internvl_chat/language.py +220 -0
  452. nexaai/mlx_backend/vlm/modeling/models/internvl_chat/processor.py +393 -0
  453. nexaai/mlx_backend/vlm/modeling/models/internvl_chat/vision.py +293 -0
  454. nexaai/mlx_backend/vlm/modeling/models/kernels.py +307 -0
  455. nexaai/mlx_backend/vlm/modeling/models/kimi_vl/__init__.py +8 -0
  456. nexaai/mlx_backend/vlm/modeling/models/kimi_vl/kimi_vl.py +143 -0
  457. nexaai/mlx_backend/vlm/modeling/models/kimi_vl/language.py +509 -0
  458. nexaai/mlx_backend/vlm/modeling/models/kimi_vl/vision.py +522 -0
  459. nexaai/mlx_backend/vlm/modeling/models/llama4/__init__.py +8 -0
  460. nexaai/mlx_backend/vlm/modeling/models/llama4/language.py +386 -0
  461. nexaai/mlx_backend/vlm/modeling/models/llama4/llama4.py +138 -0
  462. nexaai/mlx_backend/vlm/modeling/models/llama4/vision.py +560 -0
  463. nexaai/mlx_backend/vlm/modeling/models/llava/__init__.py +8 -0
  464. nexaai/mlx_backend/vlm/modeling/models/llava/language.py +240 -0
  465. nexaai/mlx_backend/vlm/modeling/models/llava/llava.py +153 -0
  466. nexaai/mlx_backend/vlm/modeling/models/llava/vision.py +259 -0
  467. nexaai/mlx_backend/vlm/modeling/models/llava_bunny/__init__.py +9 -0
  468. nexaai/mlx_backend/vlm/modeling/models/llava_bunny/language.py +236 -0
  469. nexaai/mlx_backend/vlm/modeling/models/llava_bunny/llava_bunny.py +256 -0
  470. nexaai/mlx_backend/vlm/modeling/models/llava_bunny/vision.py +303 -0
  471. nexaai/mlx_backend/vlm/modeling/models/llava_next/__init__.py +8 -0
  472. nexaai/mlx_backend/vlm/modeling/models/llava_next/language.py +230 -0
  473. nexaai/mlx_backend/vlm/modeling/models/llava_next/llava_next.py +160 -0
  474. nexaai/mlx_backend/vlm/modeling/models/llava_next/vision.py +243 -0
  475. nexaai/mlx_backend/vlm/modeling/models/mistral3/__init__.py +8 -0
  476. nexaai/mlx_backend/vlm/modeling/models/mistral3/mistral3.py +283 -0
  477. nexaai/mlx_backend/vlm/modeling/models/mllama/__init__.py +8 -0
  478. nexaai/mlx_backend/vlm/modeling/models/mllama/language.py +416 -0
  479. nexaai/mlx_backend/vlm/modeling/models/mllama/mllama.py +172 -0
  480. nexaai/mlx_backend/vlm/modeling/models/mllama/vision.py +499 -0
  481. nexaai/mlx_backend/vlm/modeling/models/molmo/__init__.py +8 -0
  482. nexaai/mlx_backend/vlm/modeling/models/molmo/language.py +243 -0
  483. nexaai/mlx_backend/vlm/modeling/models/molmo/molmo.py +133 -0
  484. nexaai/mlx_backend/vlm/modeling/models/molmo/vision.py +465 -0
  485. nexaai/mlx_backend/vlm/modeling/models/multi_modality/__init__.py +10 -0
  486. nexaai/mlx_backend/vlm/modeling/models/multi_modality/language.py +230 -0
  487. nexaai/mlx_backend/vlm/modeling/models/multi_modality/multi_modality.py +385 -0
  488. nexaai/mlx_backend/vlm/modeling/models/multi_modality/sam.py +557 -0
  489. nexaai/mlx_backend/vlm/modeling/models/multi_modality/vision.py +526 -0
  490. nexaai/mlx_backend/vlm/modeling/models/paligemma/__init__.py +8 -0
  491. nexaai/mlx_backend/vlm/modeling/models/paligemma/language.py +282 -0
  492. nexaai/mlx_backend/vlm/modeling/models/paligemma/paligemma.py +160 -0
  493. nexaai/mlx_backend/vlm/modeling/models/paligemma/vision.py +242 -0
  494. nexaai/mlx_backend/vlm/modeling/models/phi3_v/__init__.py +8 -0
  495. nexaai/mlx_backend/vlm/modeling/models/phi3_v/language.py +21 -0
  496. nexaai/mlx_backend/vlm/modeling/models/phi3_v/phi3_v.py +243 -0
  497. nexaai/mlx_backend/vlm/modeling/models/phi3_v/su_rope.py +71 -0
  498. nexaai/mlx_backend/vlm/modeling/models/phi3_v/vision.py +324 -0
  499. nexaai/mlx_backend/vlm/modeling/models/pixtral/__init__.py +8 -0
  500. nexaai/mlx_backend/vlm/modeling/models/pixtral/language.py +229 -0
  501. nexaai/mlx_backend/vlm/modeling/models/pixtral/pixtral.py +161 -0
  502. nexaai/mlx_backend/vlm/modeling/models/pixtral/vision.py +320 -0
  503. nexaai/mlx_backend/vlm/modeling/models/qwen2_5_vl/__init__.py +2 -0
  504. nexaai/mlx_backend/vlm/modeling/models/qwen2_5_vl/config.py +108 -0
  505. nexaai/mlx_backend/vlm/modeling/models/qwen2_5_vl/language.py +490 -0
  506. nexaai/mlx_backend/vlm/modeling/models/qwen2_5_vl/qwen2_5_vl.py +168 -0
  507. nexaai/mlx_backend/vlm/modeling/models/qwen2_5_vl/vision.py +414 -0
  508. nexaai/mlx_backend/vlm/modeling/models/qwen2_vl/__init__.py +2 -0
  509. nexaai/mlx_backend/vlm/modeling/models/qwen2_vl/config.py +104 -0
  510. nexaai/mlx_backend/vlm/modeling/models/qwen2_vl/language.py +490 -0
  511. nexaai/mlx_backend/vlm/modeling/models/qwen2_vl/qwen2_vl.py +167 -0
  512. nexaai/mlx_backend/vlm/modeling/models/qwen2_vl/vision.py +312 -0
  513. nexaai/mlx_backend/vlm/modeling/models/qwen3_vl/llm_common/__init__.py +0 -0
  514. nexaai/mlx_backend/vlm/modeling/models/qwen3_vl/llm_common/base.py +117 -0
  515. nexaai/mlx_backend/vlm/modeling/models/qwen3_vl/llm_common/cache.py +531 -0
  516. nexaai/mlx_backend/vlm/modeling/models/qwen3_vl/llm_common/generate.py +701 -0
  517. nexaai/mlx_backend/vlm/modeling/models/qwen3_vl/llm_common/rope_utils.py +255 -0
  518. nexaai/mlx_backend/vlm/modeling/models/qwen3_vl/llm_common/sample_utils.py +303 -0
  519. nexaai/mlx_backend/vlm/modeling/models/qwen3_vl/llm_common/tokenizer_utils.py +407 -0
  520. nexaai/mlx_backend/vlm/modeling/models/qwen3_vl/processor.py +476 -0
  521. nexaai/mlx_backend/vlm/modeling/models/qwen3_vl/qwen3vl.py +1223 -0
  522. nexaai/mlx_backend/vlm/modeling/models/smolvlm/__init__.py +8 -0
  523. nexaai/mlx_backend/vlm/modeling/models/smolvlm/smolvlm.py +62 -0
  524. nexaai/mlx_backend/vlm/modeling/processing_qwen2_5_vl.py +209 -0
  525. nexaai/mlx_backend/vlm/modeling/processing_qwen2_vl.py +215 -0
  526. nexaai/mlx_backend/vlm/modeling/prompt_utils.py +474 -0
  527. nexaai/mlx_backend/vlm/modeling/sample_utils.py +39 -0
  528. nexaai/mlx_backend/vlm/modeling/tokenizer_utils.py +344 -0
  529. nexaai/mlx_backend/vlm/modeling/trainer/__init__.py +9 -0
  530. nexaai/mlx_backend/vlm/modeling/trainer/lora.py +70 -0
  531. nexaai/mlx_backend/vlm/modeling/trainer/trainer.py +296 -0
  532. nexaai/mlx_backend/vlm/modeling/trainer/utils.py +160 -0
  533. nexaai/mlx_backend/vlm/modeling/utils.py +928 -0
  534. nexaai/rerank.py +55 -0
  535. nexaai/rerank_impl/__init__.py +0 -0
  536. nexaai/rerank_impl/mlx_rerank_impl.py +92 -0
  537. nexaai/rerank_impl/pybind_rerank_impl.py +43 -0
  538. nexaai/runtime.py +68 -0
  539. nexaai/tts.py +74 -0
  540. nexaai/tts_impl/__init__.py +0 -0
  541. nexaai/tts_impl/mlx_tts_impl.py +94 -0
  542. nexaai/tts_impl/pybind_tts_impl.py +43 -0
  543. nexaai/utils/avatar_fetcher.py +104 -0
  544. nexaai/utils/decode.py +18 -0
  545. nexaai/utils/manifest_utils.py +324 -0
  546. nexaai/utils/model_manager.py +1353 -0
  547. nexaai/utils/model_types.py +47 -0
  548. nexaai/utils/progress_tracker.py +385 -0
  549. nexaai/utils/quantization_utils.py +245 -0
  550. nexaai/vlm.py +128 -0
  551. nexaai/vlm_impl/__init__.py +0 -0
  552. nexaai/vlm_impl/mlx_vlm_impl.py +258 -0
  553. nexaai/vlm_impl/pybind_vlm_impl.py +230 -0
  554. nexaai-1.0.16rc13.dist-info/METADATA +32 -0
  555. nexaai-1.0.16rc13.dist-info/RECORD +557 -0
  556. nexaai-1.0.16rc13.dist-info/WHEEL +5 -0
  557. nexaai-1.0.16rc13.dist-info/top_level.txt +1 -0
@@ -0,0 +1,85 @@
1
+ from typing import List, Optional, Union
2
+
3
+ from nexaai.common import PluginID
4
+ from nexaai.image_gen import ImageGen, ImageGenerationConfig, ImageSamplerConfig, SchedulerConfig, Image
5
+
6
+
7
+ class PyBindImageGenImpl(ImageGen):
8
+ def __init__(self):
9
+ """Initialize PyBind Image Generation implementation."""
10
+ super().__init__()
11
+ # TODO: Add PyBind-specific initialization
12
+
13
+ @classmethod
14
+ def _load_from(cls,
15
+ model_path: str,
16
+ scheduler_config_path: str = "",
17
+ plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
18
+ device_id: Optional[str] = None,
19
+ float16: bool = True,
20
+ quantize: bool = False
21
+ ) -> 'PyBindImageGenImpl':
22
+ """Load image generation model from local path using PyBind backend."""
23
+ # TODO: Implement PyBind image generation loading
24
+ instance = cls()
25
+ return instance
26
+
27
+ def eject(self):
28
+ """Destroy the model and free resources."""
29
+ # TODO: Implement PyBind image generation cleanup
30
+ pass
31
+
32
+ def load_model(self, model_path: str, extra_data: Optional[str] = None) -> bool:
33
+ """Load model from path."""
34
+ # TODO: Implement PyBind image generation model loading
35
+ raise NotImplementedError("PyBind image generation model loading not yet implemented")
36
+
37
+ def set_scheduler(self, config: SchedulerConfig) -> None:
38
+ """Set scheduler configuration."""
39
+ # TODO: Implement PyBind scheduler setting
40
+ raise NotImplementedError("PyBind scheduler setting not yet implemented")
41
+
42
+ def set_sampler(self, config: ImageSamplerConfig) -> None:
43
+ """Set sampler configuration."""
44
+ # TODO: Implement PyBind sampler setting
45
+ raise NotImplementedError("PyBind sampler setting not yet implemented")
46
+
47
+ def reset_sampler(self) -> None:
48
+ """Reset sampler to default configuration."""
49
+ # TODO: Implement PyBind sampler reset
50
+ raise NotImplementedError("PyBind sampler reset not yet implemented")
51
+
52
+ def txt2img(self, prompt: str, config: ImageGenerationConfig) -> Image:
53
+ """Generate image from text prompt."""
54
+ # TODO: Implement PyBind text-to-image
55
+ raise NotImplementedError("PyBind text-to-image not yet implemented")
56
+
57
+ def img2img(self, init_image: Image, prompt: str, config: ImageGenerationConfig) -> Image:
58
+ """Generate image from initial image and text prompt."""
59
+ # TODO: Implement PyBind image-to-image
60
+ raise NotImplementedError("PyBind image-to-image not yet implemented")
61
+
62
+ def generate(self, config: ImageGenerationConfig) -> Image:
63
+ """Generate image from configuration."""
64
+ # TODO: Implement PyBind image generation
65
+ raise NotImplementedError("PyBind image generation not yet implemented")
66
+
67
+ def set_lora(self, lora_id: int) -> None:
68
+ """Set active LoRA adapter."""
69
+ # TODO: Implement PyBind LoRA setting
70
+ raise NotImplementedError("PyBind LoRA setting not yet implemented")
71
+
72
+ def add_lora(self, lora_path: str) -> int:
73
+ """Add LoRA adapter and return its ID."""
74
+ # TODO: Implement PyBind LoRA addition
75
+ raise NotImplementedError("PyBind LoRA addition not yet implemented")
76
+
77
+ def remove_lora(self, lora_id: int) -> None:
78
+ """Remove LoRA adapter."""
79
+ # TODO: Implement PyBind LoRA removal
80
+ raise NotImplementedError("PyBind LoRA removal not yet implemented")
81
+
82
+ def list_loras(self) -> List[int]:
83
+ """List available LoRA adapters."""
84
+ # TODO: Implement PyBind LoRA listing
85
+ raise NotImplementedError("PyBind LoRA listing not yet implemented")
nexaai/llm.py ADDED
@@ -0,0 +1,96 @@
1
+ from typing import Generator, Optional, Union
2
+ from abc import abstractmethod
3
+ import queue
4
+ import threading
5
+
6
+ from nexaai.common import ModelConfig, GenerationConfig, ChatMessage, PluginID
7
+ from nexaai.base import BaseModel, ProfilingData
8
+
9
+ class LLM(BaseModel):
10
+ def __init__(self, m_cfg: ModelConfig = ModelConfig()):
11
+ """Initialize base LLM class."""
12
+ self._m_cfg = m_cfg
13
+ self._cancel_event = threading.Event() # New attribute to control cancellation
14
+
15
+ @classmethod
16
+ def _load_from(cls,
17
+ local_path: str,
18
+ tokenizer_path: Optional[str] = None,
19
+ m_cfg: ModelConfig = ModelConfig(),
20
+ plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
21
+ device_id: Optional[str] = None
22
+ ) -> 'LLM':
23
+ """Load model from local path, routing to appropriate implementation."""
24
+ # Check plugin_id value for routing - handle both enum and string
25
+ plugin_value = plugin_id.value if isinstance(plugin_id, PluginID) else plugin_id
26
+
27
+ if plugin_value == "mlx":
28
+ from nexaai.llm_impl.mlx_llm_impl import MLXLLMImpl
29
+ return MLXLLMImpl._load_from(local_path, tokenizer_path, m_cfg, plugin_id, device_id)
30
+ else:
31
+ from nexaai.llm_impl.pybind_llm_impl import PyBindLLMImpl
32
+ return PyBindLLMImpl._load_from(local_path, tokenizer_path, m_cfg, plugin_id, device_id)
33
+
34
+ def cancel_generation(self):
35
+ """Signal to cancel any ongoing stream generation."""
36
+ self._cancel_event.set()
37
+
38
+ def reset_cancel(self):
39
+ """Reset the cancel event. Call before starting a new generation if needed."""
40
+ self._cancel_event.clear()
41
+
42
+ @abstractmethod
43
+ def apply_chat_template(self, messages: list[ChatMessage], tools: Optional[str] = None, enable_thinking: bool = True, add_generation_prompt: bool = True) -> str:
44
+ """Apply the chat template to messages."""
45
+ pass
46
+
47
+ @abstractmethod
48
+ def generate_stream(self, prompt: str, g_cfg: GenerationConfig = GenerationConfig()) -> Generator[str, None, None]:
49
+ """Generate text with streaming."""
50
+ pass
51
+
52
+ @abstractmethod
53
+ def generate(self, prompt: str, g_cfg: GenerationConfig = GenerationConfig()) -> str:
54
+ """
55
+ Generate text without streaming.
56
+
57
+ Args:
58
+ prompt (str): The prompt to generate text from. For chat models, this is the chat messages after chat template is applied.
59
+ g_cfg (GenerationConfig): Generation configuration.
60
+
61
+ Returns:
62
+ str: The generated text.
63
+ """
64
+ pass
65
+
66
+ def get_profiling_data(self) -> Optional[ProfilingData]:
67
+ """Get profiling data from the last generation."""
68
+ pass
69
+
70
+ @abstractmethod
71
+ def save_kv_cache(self, path: str):
72
+ """
73
+ Save the key-value cache to the file.
74
+
75
+ Args:
76
+ path (str): The path to the file.
77
+ """
78
+ pass
79
+
80
+ @abstractmethod
81
+ def load_kv_cache(self, path: str):
82
+ """
83
+ Load the key-value cache from the file.
84
+
85
+ Args:
86
+ path (str): The path to the file.
87
+ """
88
+ pass
89
+
90
+ @abstractmethod
91
+ def reset(self):
92
+ """
93
+ Reset the LLM model context and KV cache. If not reset, the model will skip the number of evaluated tokens and treat tokens after those as the new incremental tokens.
94
+ If your past chat history changed, or you are starting a new chat, you should always reset the model before running generate.
95
+ """
96
+ pass
File without changes
@@ -0,0 +1,269 @@
1
+ from typing import Generator, Optional, Any, Sequence, Union
2
+
3
+ from nexaai.base import ProfilingData
4
+ from nexaai.common import ModelConfig, GenerationConfig, ChatMessage, PluginID
5
+ from nexaai.llm import LLM
6
+ from nexaai.mlx_backend.llm.interface import LLM as MLXLLMInterface
7
+ from nexaai.mlx_backend.ml import ModelConfig as MLXModelConfig, SamplerConfig as MLXSamplerConfig, GenerationConfig as MLXGenerationConfig, EmbeddingConfig
8
+
9
+
10
+ class MLXLLMImpl(LLM):
11
+ def __init__(self, m_cfg: ModelConfig = ModelConfig()):
12
+ """Initialize MLX LLM implementation."""
13
+ super().__init__(m_cfg)
14
+ self._mlx_llm = None
15
+
16
+ @classmethod
17
+ def _load_from(cls,
18
+ local_path: str,
19
+ tokenizer_path: Optional[str] = None,
20
+ m_cfg: ModelConfig = ModelConfig(),
21
+ plugin_id: Union[PluginID, str] = PluginID.MLX,
22
+ device_id: Optional[str] = None
23
+ ) -> 'MLXLLMImpl':
24
+ """Load model from local path using MLX backend."""
25
+ try:
26
+ # MLX interface and configs are already imported
27
+
28
+ # Convert our ModelConfig to MLX ModelConfig
29
+ mlx_config = MLXModelConfig()
30
+ mlx_config.n_ctx = m_cfg.n_ctx
31
+ mlx_config.n_threads = m_cfg.n_threads
32
+ mlx_config.n_threads_batch = m_cfg.n_threads_batch
33
+ mlx_config.n_batch = m_cfg.n_batch
34
+ mlx_config.n_ubatch = m_cfg.n_ubatch
35
+ mlx_config.n_seq_max = m_cfg.n_seq_max
36
+ mlx_config.chat_template_path = m_cfg.chat_template_path
37
+ mlx_config.chat_template_content = m_cfg.chat_template_content
38
+
39
+ # Create instance and load MLX model
40
+ instance = cls(m_cfg)
41
+ instance._mlx_llm = MLXLLMInterface(
42
+ model_path=local_path,
43
+ tokenizer_path=tokenizer_path or local_path,
44
+ config=mlx_config,
45
+ device=device_id
46
+ )
47
+
48
+ return instance
49
+ except Exception as e:
50
+ raise RuntimeError(f"Failed to load MLX LLM: {str(e)}")
51
+
52
+ def eject(self):
53
+ """Release the model from memory."""
54
+ if self._mlx_llm:
55
+ self._mlx_llm.destroy()
56
+ self._mlx_llm = None
57
+
58
+ def apply_chat_template(
59
+ self,
60
+ messages: Sequence[ChatMessage],
61
+ tools: Optional[str] = None,
62
+ enable_thinking: bool = True,
63
+ add_generation_prompt: bool = True
64
+ ) -> str:
65
+ """Apply the chat template to messages."""
66
+ if not self._mlx_llm:
67
+ raise RuntimeError("MLX LLM not loaded")
68
+
69
+ try:
70
+ # Convert to MLX ChatMessage format
71
+ mlx_messages = []
72
+ for msg in messages:
73
+ # Create a simple object with role and content attributes
74
+ class MLXChatMessage:
75
+ def __init__(self, role, content):
76
+ self.role = role
77
+ self.content = content
78
+
79
+ # Handle both dict-style and attribute-style access
80
+ if hasattr(msg, 'role') and hasattr(msg, 'content'):
81
+ # Message is already an object with attributes
82
+ mlx_messages.append(MLXChatMessage(msg.role, msg.content))
83
+ else:
84
+ # Message is a dict
85
+ mlx_messages.append(MLXChatMessage(msg["role"], msg["content"]))
86
+
87
+ return self._mlx_llm.apply_chat_template(mlx_messages, tools=tools, enable_thinking=enable_thinking, add_generation_prompt=add_generation_prompt)
88
+ except Exception as e:
89
+ raise RuntimeError(f"Failed to apply chat template: {str(e)}")
90
+
91
+ def generate_stream(self, prompt: str, g_cfg: GenerationConfig = GenerationConfig()) -> Generator[str, None, None]:
92
+ """Generate text with streaming."""
93
+ if not self._mlx_llm:
94
+ raise RuntimeError("MLX LLM not loaded")
95
+
96
+ try:
97
+ import queue
98
+ import threading
99
+
100
+ # Convert GenerationConfig to MLX format
101
+
102
+ mlx_gen_config = MLXGenerationConfig()
103
+ mlx_gen_config.max_tokens = g_cfg.max_tokens
104
+ mlx_gen_config.stop = g_cfg.stop_words
105
+ mlx_gen_config.image_paths = g_cfg.image_paths
106
+ mlx_gen_config.audio_paths = g_cfg.audio_paths
107
+
108
+ if g_cfg.sampler_config:
109
+ mlx_sampler_config = MLXSamplerConfig()
110
+ mlx_sampler_config.temperature = g_cfg.sampler_config.temperature
111
+ mlx_sampler_config.top_p = g_cfg.sampler_config.top_p
112
+ mlx_sampler_config.top_k = g_cfg.sampler_config.top_k
113
+ mlx_sampler_config.repetition_penalty = g_cfg.sampler_config.repetition_penalty
114
+ mlx_sampler_config.presence_penalty = g_cfg.sampler_config.presence_penalty
115
+ mlx_sampler_config.frequency_penalty = g_cfg.sampler_config.frequency_penalty
116
+ mlx_sampler_config.seed = g_cfg.sampler_config.seed
117
+ mlx_sampler_config.grammar_path = g_cfg.sampler_config.grammar_path
118
+ mlx_sampler_config.grammar_string = g_cfg.sampler_config.grammar_string
119
+ mlx_gen_config.sampler_config = mlx_sampler_config
120
+
121
+ # Create a queue for streaming tokens
122
+ token_queue = queue.Queue()
123
+ exception_container = [None]
124
+ self.reset_cancel() # Reset cancel flag before generation
125
+
126
+ def token_callback(token: str, user_data: Any = None) -> bool:
127
+ if self._cancel_event.is_set():
128
+ token_queue.put(('end', None))
129
+ return False
130
+ try:
131
+ token_queue.put(('token', token))
132
+ return True
133
+ except Exception as e:
134
+ exception_container[0] = e
135
+ return False
136
+
137
+ # Run generation in a separate thread
138
+ def generate():
139
+ try:
140
+ self._mlx_llm.generate_stream(prompt, mlx_gen_config, token_callback)
141
+ except Exception as e:
142
+ exception_container[0] = e
143
+ finally:
144
+ token_queue.put(('end', None))
145
+
146
+ thread = threading.Thread(target=generate)
147
+ thread.start()
148
+
149
+ # Yield tokens as they come from the queue
150
+ while True:
151
+ if exception_container[0]:
152
+ raise exception_container[0]
153
+
154
+ try:
155
+ msg_type, token = token_queue.get(timeout=0.1)
156
+ if msg_type == 'end':
157
+ break
158
+ elif msg_type == 'token':
159
+ yield token
160
+ except queue.Empty:
161
+ if not thread.is_alive():
162
+ break
163
+ continue
164
+
165
+ thread.join()
166
+
167
+ if exception_container[0]:
168
+ raise exception_container[0]
169
+
170
+ except Exception as e:
171
+ raise RuntimeError(f"Failed to generate streaming text: {str(e)}")
172
+
173
+ def generate(self, prompt: str, g_cfg: GenerationConfig = GenerationConfig()) -> str:
174
+ """
175
+ Generate text without streaming.
176
+
177
+ Args:
178
+ prompt (str): The prompt to generate text from.
179
+ g_cfg (GenerationConfig): Generation configuration.
180
+
181
+ Returns:
182
+ str: The generated text.
183
+ """
184
+ if not self._mlx_llm:
185
+ raise RuntimeError("MLX LLM not loaded")
186
+
187
+ try:
188
+ # Convert GenerationConfig to MLX format
189
+
190
+ mlx_gen_config = MLXGenerationConfig()
191
+ mlx_gen_config.max_tokens = g_cfg.max_tokens
192
+ mlx_gen_config.stop = g_cfg.stop_words
193
+ mlx_gen_config.image_paths = g_cfg.image_paths
194
+ mlx_gen_config.audio_paths = g_cfg.audio_paths
195
+
196
+ if g_cfg.sampler_config:
197
+ mlx_sampler_config = MLXSamplerConfig()
198
+ mlx_sampler_config.temperature = g_cfg.sampler_config.temperature
199
+ mlx_sampler_config.top_p = g_cfg.sampler_config.top_p
200
+ mlx_sampler_config.top_k = g_cfg.sampler_config.top_k
201
+ mlx_sampler_config.repetition_penalty = g_cfg.sampler_config.repetition_penalty
202
+ mlx_sampler_config.presence_penalty = g_cfg.sampler_config.presence_penalty
203
+ mlx_sampler_config.frequency_penalty = g_cfg.sampler_config.frequency_penalty
204
+ mlx_sampler_config.seed = g_cfg.sampler_config.seed
205
+ mlx_sampler_config.grammar_path = g_cfg.sampler_config.grammar_path
206
+ mlx_sampler_config.grammar_string = g_cfg.sampler_config.grammar_string
207
+ mlx_gen_config.sampler_config = mlx_sampler_config
208
+
209
+ # Simple token callback that just continues
210
+ def token_callback(token: str, user_data: Any = None) -> bool:
211
+ return not self._cancel_event.is_set()
212
+
213
+ # Use MLX streaming generation and return the full result
214
+ return self._mlx_llm.generate_stream(prompt, mlx_gen_config, token_callback)
215
+
216
+ except Exception as e:
217
+ raise RuntimeError(f"Failed to generate text: {str(e)}")
218
+
219
+ def get_profiling_data(self) -> Optional[ProfilingData]:
220
+ """Get profiling data from the last generation."""
221
+ if not self._mlx_llm:
222
+ raise RuntimeError("MLX LLM not loaded")
223
+ return self._mlx_llm.get_profiling_data()
224
+
225
+ def save_kv_cache(self, path: str):
226
+ """
227
+ Save the key-value cache to the file.
228
+
229
+ Args:
230
+ path (str): The path to the file.
231
+ """
232
+ if not self._mlx_llm:
233
+ raise RuntimeError("MLX LLM not loaded")
234
+
235
+ try:
236
+ success = self._mlx_llm.save_kv_cache(path)
237
+ if not success:
238
+ raise RuntimeError("Failed to save KV cache")
239
+ except Exception as e:
240
+ raise RuntimeError(f"Failed to save KV cache: {str(e)}")
241
+
242
+ def load_kv_cache(self, path: str):
243
+ """
244
+ Load the key-value cache from the file.
245
+
246
+ Args:
247
+ path (str): The path to the file.
248
+ """
249
+ if not self._mlx_llm:
250
+ raise RuntimeError("MLX LLM not loaded")
251
+
252
+ try:
253
+ success = self._mlx_llm.load_kv_cache(path)
254
+ if not success:
255
+ raise RuntimeError("Failed to load KV cache")
256
+ except Exception as e:
257
+ raise RuntimeError(f"Failed to load KV cache: {str(e)}")
258
+
259
+ def reset(self):
260
+ """
261
+ Reset the LLM model context and KV cache.
262
+ """
263
+ if not self._mlx_llm:
264
+ raise RuntimeError("MLX LLM not loaded")
265
+
266
+ try:
267
+ self._mlx_llm.reset()
268
+ except Exception as e:
269
+ raise RuntimeError(f"Failed to reset MLX LLM: {str(e)}")
@@ -0,0 +1,218 @@
1
+ from typing import Generator, Optional, Union
2
+ import queue
3
+ import threading
4
+
5
+ from nexaai.base import ProfilingData
6
+ from nexaai.common import ModelConfig, GenerationConfig, ChatMessage, PluginID
7
+ from nexaai.binds import llm_bind, common_bind
8
+ from nexaai.runtime import _ensure_runtime
9
+ from nexaai.llm import LLM
10
+
11
+
12
+ class PyBindLLMImpl(LLM):
13
+ def __init__(self, handle: any, m_cfg: ModelConfig = ModelConfig()):
14
+ """Private constructor, should not be called directly."""
15
+ super().__init__(m_cfg)
16
+ self._handle = handle # This is a py::capsule
17
+ self._profiling_data = None
18
+
19
+ @classmethod
20
+ def _load_from(cls,
21
+ local_path: str,
22
+ tokenizer_path: Optional[str] = None,
23
+ m_cfg: ModelConfig = ModelConfig(),
24
+ plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
25
+ device_id: Optional[str] = None
26
+ ) -> 'PyBindLLMImpl':
27
+ """Load model from local path."""
28
+ _ensure_runtime()
29
+
30
+ config = common_bind.ModelConfig()
31
+
32
+ config.n_ctx = m_cfg.n_ctx
33
+ if m_cfg.n_threads is not None:
34
+ config.n_threads = m_cfg.n_threads
35
+ if m_cfg.n_threads_batch is not None:
36
+ config.n_threads_batch = m_cfg.n_threads_batch
37
+ if m_cfg.n_batch is not None:
38
+ config.n_batch = m_cfg.n_batch
39
+ if m_cfg.n_ubatch is not None:
40
+ config.n_ubatch = m_cfg.n_ubatch
41
+ if m_cfg.n_seq_max is not None:
42
+ config.n_seq_max = m_cfg.n_seq_max
43
+ if m_cfg.n_gpu_layers is not None:
44
+ config.n_gpu_layers = m_cfg.n_gpu_layers
45
+
46
+ # handle chat template strings
47
+ if m_cfg.chat_template_path:
48
+ config.chat_template_path = m_cfg.chat_template_path
49
+
50
+ if m_cfg.chat_template_content:
51
+ config.chat_template_content = m_cfg.chat_template_content
52
+
53
+ # Create handle : returns py::capsule with automatic cleanup
54
+ # Convert enum to string for C++ binding
55
+ plugin_id_str = plugin_id.value if isinstance(plugin_id, PluginID) else plugin_id
56
+ handle = llm_bind.ml_llm_create(
57
+ model_path=local_path,
58
+ tokenizer_path=tokenizer_path,
59
+ model_config=config,
60
+ plugin_id=plugin_id_str,
61
+ device_id=device_id
62
+ )
63
+ return cls(handle, m_cfg)
64
+
65
+ def eject(self):
66
+ """Release the model from memory."""
67
+ # py::capsule handles cleanup automatically
68
+ del self._handle
69
+ self._handle = None
70
+
71
+ def apply_chat_template(self, messages: list[ChatMessage], tools: Optional[str] = None, enable_thinking: bool = True, add_generation_prompt: bool = True) -> str:
72
+ """Apply the chat template to messages."""
73
+ # Convert TypedDict to list of dicts for binding
74
+ message_dicts = [
75
+ {"role": m["role"], "content": m["content"]}
76
+ for m in messages
77
+ ]
78
+ return llm_bind.ml_llm_apply_chat_template(self._handle, message_dicts)
79
+
80
+ def generate_stream(self, prompt: str, g_cfg: GenerationConfig = GenerationConfig()) -> Generator[str, None, None]:
81
+ """Generate text with streaming."""
82
+ token_queue = queue.Queue()
83
+ exception_container = [None]
84
+ self.reset_cancel() # Reset cancel flag before generation
85
+
86
+ def on_token(token: str, user_data) -> bool:
87
+ if self._cancel_event.is_set():
88
+ token_queue.put(('end', None))
89
+ return False # Stop generation
90
+ try:
91
+ token_queue.put(('token', token))
92
+ return True # Continue generation
93
+ except Exception as e:
94
+ exception_container[0] = e
95
+ return False # Stop generation
96
+
97
+ config = self._convert_generation_config(g_cfg)
98
+
99
+ # Run generation in thread
100
+ def generate():
101
+ try:
102
+ result = llm_bind.ml_llm_generate(
103
+ handle=self._handle,
104
+ prompt=prompt,
105
+ config=config,
106
+ on_token=on_token,
107
+ user_data=None
108
+ )
109
+ self._profiling_data = ProfilingData.from_dict(result.get("profile_data", {}))
110
+ except Exception as e:
111
+ exception_container[0] = e
112
+ finally:
113
+ token_queue.put(('end', None))
114
+
115
+ thread = threading.Thread(target=generate)
116
+ thread.start()
117
+
118
+ # Yield tokens as they come
119
+ try:
120
+ while True:
121
+ msg_type, token = token_queue.get()
122
+ if msg_type == 'token':
123
+ yield token
124
+ elif msg_type in ('error', 'end'):
125
+ break
126
+ finally:
127
+ thread.join()
128
+
129
+ if exception_container[0]:
130
+ raise exception_container[0]
131
+
132
+ def generate(self, prompt: str, g_cfg: GenerationConfig = GenerationConfig()) -> str:
133
+ """
134
+ Generate text without streaming.
135
+
136
+ Args:
137
+ prompt (str): The prompt to generate text from. For chat models, this is the chat messages after chat template is applied.
138
+ g_cfg (GenerationConfig): Generation configuration.
139
+
140
+ Returns:
141
+ str: The generated text.
142
+ """
143
+ config = self._convert_generation_config(g_cfg)
144
+ result = llm_bind.ml_llm_generate(
145
+ handle=self._handle,
146
+ prompt=prompt,
147
+ config=config,
148
+ on_token=None, # No callback for non-streaming
149
+ user_data=None
150
+ )
151
+
152
+ self._profiling_data = ProfilingData.from_dict(result.get("profile_data", {}))
153
+ return result.get("text", "")
154
+
155
+ def get_profiling_data(self) -> Optional[ProfilingData]:
156
+ """Get profiling data."""
157
+ return self._profiling_data
158
+
159
+ def save_kv_cache(self, path: str):
160
+ """
161
+ Save the key-value cache to the file.
162
+
163
+ Args:
164
+ path (str): The path to the file.
165
+ """
166
+ llm_bind.ml_llm_save_kv_cache(self._handle, path)
167
+
168
+ def load_kv_cache(self, path: str):
169
+ """
170
+ Load the key-value cache from the file.
171
+
172
+ Args:
173
+ path (str): The path to the file.
174
+ """
175
+ llm_bind.ml_llm_load_kv_cache(self._handle, path)
176
+
177
+ def reset(self):
178
+ """
179
+ Reset the LLM model context and KV cache. If not reset, the model will skip the number of evaluated tokens and treat tokens after those as the new incremental tokens.
180
+ If your past chat history changed, or you are starting a new chat, you should always reset the model before running generate.
181
+ """
182
+ llm_bind.ml_llm_reset(self._handle)
183
+
184
+ def _convert_generation_config(self, g_cfg: GenerationConfig):
185
+ """Convert GenerationConfig to binding format."""
186
+ config = common_bind.GenerationConfig()
187
+
188
+ # Set basic generation parameters
189
+ config.max_tokens = g_cfg.max_tokens
190
+
191
+ if g_cfg.stop_words:
192
+ config.stop = g_cfg.stop_words
193
+
194
+ if g_cfg.image_paths:
195
+ config.image_paths = g_cfg.image_paths
196
+
197
+ if g_cfg.audio_paths:
198
+ config.audio_paths = g_cfg.audio_paths
199
+
200
+ if g_cfg.sampler_config:
201
+ sampler = common_bind.SamplerConfig()
202
+ sampler.temperature = g_cfg.sampler_config.temperature
203
+ sampler.top_p = g_cfg.sampler_config.top_p
204
+ sampler.top_k = g_cfg.sampler_config.top_k
205
+ sampler.repetition_penalty = g_cfg.sampler_config.repetition_penalty
206
+ sampler.presence_penalty = g_cfg.sampler_config.presence_penalty
207
+ sampler.frequency_penalty = g_cfg.sampler_config.frequency_penalty
208
+ sampler.seed = g_cfg.sampler_config.seed
209
+
210
+ if g_cfg.sampler_config.grammar_path:
211
+ sampler.grammar_path = g_cfg.sampler_config.grammar_path
212
+
213
+ if g_cfg.sampler_config.grammar_string:
214
+ sampler.grammar_string = g_cfg.sampler_config.grammar_string
215
+
216
+ config.sampler_config = sampler
217
+
218
+ return config