nexaai 1.0.16rc13__cp310-cp310-macosx_15_0_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nexaai might be problematic. Click here for more details.

Files changed (557) hide show
  1. nexaai/__init__.py +83 -0
  2. nexaai/_stub.cpython-310-darwin.so +0 -0
  3. nexaai/_version.py +4 -0
  4. nexaai/asr.py +64 -0
  5. nexaai/asr_impl/__init__.py +0 -0
  6. nexaai/asr_impl/mlx_asr_impl.py +92 -0
  7. nexaai/asr_impl/pybind_asr_impl.py +44 -0
  8. nexaai/base.py +39 -0
  9. nexaai/binds/__init__.py +4 -0
  10. nexaai/binds/common_bind.cpython-310-darwin.so +0 -0
  11. nexaai/binds/embedder_bind.cpython-310-darwin.so +0 -0
  12. nexaai/binds/libnexa_bridge.dylib +0 -0
  13. nexaai/binds/llm_bind.cpython-310-darwin.so +0 -0
  14. nexaai/binds/nexa_llama_cpp/libggml-base.dylib +0 -0
  15. nexaai/binds/nexa_llama_cpp/libggml-cpu.so +0 -0
  16. nexaai/binds/nexa_llama_cpp/libggml-metal.so +0 -0
  17. nexaai/binds/nexa_llama_cpp/libggml.dylib +0 -0
  18. nexaai/binds/nexa_llama_cpp/libllama.dylib +0 -0
  19. nexaai/binds/nexa_llama_cpp/libmtmd.dylib +0 -0
  20. nexaai/binds/nexa_llama_cpp/libnexa_plugin.dylib +0 -0
  21. nexaai/binds/nexa_mlx/libnexa_plugin.dylib +0 -0
  22. nexaai/binds/nexa_mlx/py-lib/ml.py +888 -0
  23. nexaai/binds/nexa_mlx/py-lib/mlx_audio/__init__.py +0 -0
  24. nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/__init__.py +1 -0
  25. nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/__init__.py +5 -0
  26. nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/bigvgan/__init__.py +1 -0
  27. nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/bigvgan/activation.py +51 -0
  28. nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/bigvgan/amp.py +96 -0
  29. nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/bigvgan/bigvgan.py +149 -0
  30. nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/bigvgan/conv.py +114 -0
  31. nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/bigvgan/resample.py +177 -0
  32. nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/descript/__init__.py +1 -0
  33. nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/descript/base.py +228 -0
  34. nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/descript/dac.py +285 -0
  35. nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/descript/nn/__init__.py +1 -0
  36. nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/descript/nn/layers.py +129 -0
  37. nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/descript/nn/quantize.py +149 -0
  38. nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/encodec/__init__.py +1 -0
  39. nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/encodec/encodec.py +777 -0
  40. nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/mimi/__init__.py +1 -0
  41. nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/mimi/mimi.py +286 -0
  42. nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/mimi/modules/__init__.py +20 -0
  43. nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/mimi/modules/conv.py +398 -0
  44. nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/mimi/modules/kv_cache.py +199 -0
  45. nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/mimi/modules/quantization.py +179 -0
  46. nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/mimi/modules/seanet.py +314 -0
  47. nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/mimi/modules/transformer.py +256 -0
  48. nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/s3/__init__.py +1 -0
  49. nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/s3/model.py +260 -0
  50. nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/s3/model_v2.py +383 -0
  51. nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/s3/utils.py +122 -0
  52. nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/snac/__init__.py +1 -0
  53. nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/snac/attention.py +97 -0
  54. nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/snac/layers.py +306 -0
  55. nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/snac/snac.py +154 -0
  56. nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/snac/vq.py +135 -0
  57. nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/vocos/__init__.py +1 -0
  58. nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/vocos/mel.py +33 -0
  59. nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/models/vocos/vocos.py +359 -0
  60. nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/tests/__init__.py +0 -0
  61. nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/tests/test_bigvgan.py +54 -0
  62. nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/tests/test_descript.py +109 -0
  63. nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/tests/test_encodec.py +58 -0
  64. nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/tests/test_mimi.py +22 -0
  65. nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/tests/test_s3.py +25 -0
  66. nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/tests/test_snac.py +40 -0
  67. nexaai/binds/nexa_mlx/py-lib/mlx_audio/codec/tests/test_vocos.py +93 -0
  68. nexaai/binds/nexa_mlx/py-lib/mlx_audio/server.py +525 -0
  69. nexaai/binds/nexa_mlx/py-lib/mlx_audio/sts/__init__.py +0 -0
  70. nexaai/binds/nexa_mlx/py-lib/mlx_audio/sts/tests/test_voice_pipeline.py +156 -0
  71. nexaai/binds/nexa_mlx/py-lib/mlx_audio/sts/voice_pipeline.py +327 -0
  72. nexaai/binds/nexa_mlx/py-lib/mlx_audio/stt/__init__.py +0 -0
  73. nexaai/binds/nexa_mlx/py-lib/mlx_audio/stt/generate.py +174 -0
  74. nexaai/binds/nexa_mlx/py-lib/mlx_audio/stt/models/__init__.py +0 -0
  75. nexaai/binds/nexa_mlx/py-lib/mlx_audio/stt/models/parakeet/__init__.py +1 -0
  76. nexaai/binds/nexa_mlx/py-lib/mlx_audio/stt/models/parakeet/alignment.py +248 -0
  77. nexaai/binds/nexa_mlx/py-lib/mlx_audio/stt/models/parakeet/attention.py +187 -0
  78. nexaai/binds/nexa_mlx/py-lib/mlx_audio/stt/models/parakeet/audio.py +76 -0
  79. nexaai/binds/nexa_mlx/py-lib/mlx_audio/stt/models/parakeet/conformer.py +331 -0
  80. nexaai/binds/nexa_mlx/py-lib/mlx_audio/stt/models/parakeet/ctc.py +34 -0
  81. nexaai/binds/nexa_mlx/py-lib/mlx_audio/stt/models/parakeet/parakeet.py +604 -0
  82. nexaai/binds/nexa_mlx/py-lib/mlx_audio/stt/models/parakeet/rnnt.py +157 -0
  83. nexaai/binds/nexa_mlx/py-lib/mlx_audio/stt/models/parakeet/tokenizer.py +2 -0
  84. nexaai/binds/nexa_mlx/py-lib/mlx_audio/stt/models/wav2vec/feature_extractor.py +757 -0
  85. nexaai/binds/nexa_mlx/py-lib/mlx_audio/stt/models/wav2vec/wav2vec.py +738 -0
  86. nexaai/binds/nexa_mlx/py-lib/mlx_audio/stt/models/whisper/__init__.py +1 -0
  87. nexaai/binds/nexa_mlx/py-lib/mlx_audio/stt/models/whisper/audio.py +82 -0
  88. nexaai/binds/nexa_mlx/py-lib/mlx_audio/stt/models/whisper/decoding.py +742 -0
  89. nexaai/binds/nexa_mlx/py-lib/mlx_audio/stt/models/whisper/timing.py +329 -0
  90. nexaai/binds/nexa_mlx/py-lib/mlx_audio/stt/models/whisper/tokenizer.py +398 -0
  91. nexaai/binds/nexa_mlx/py-lib/mlx_audio/stt/models/whisper/whisper.py +862 -0
  92. nexaai/binds/nexa_mlx/py-lib/mlx_audio/stt/models/whisper/writers.py +268 -0
  93. nexaai/binds/nexa_mlx/py-lib/mlx_audio/stt/tests/test_models.py +381 -0
  94. nexaai/binds/nexa_mlx/py-lib/mlx_audio/stt/utils.py +195 -0
  95. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/__init__.py +1 -0
  96. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/audio_player.py +120 -0
  97. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/convert.py +71 -0
  98. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/generate.py +449 -0
  99. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/__init__.py +0 -0
  100. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/bark/__init__.py +4 -0
  101. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/bark/bark.py +528 -0
  102. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/bark/isftnet.py +12 -0
  103. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/bark/pipeline.py +442 -0
  104. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/base.py +84 -0
  105. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/dia/__init__.py +1 -0
  106. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/dia/audio.py +287 -0
  107. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/dia/config.py +256 -0
  108. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/dia/dia.py +592 -0
  109. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/dia/layers.py +870 -0
  110. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/indextts/__init__.py +3 -0
  111. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/indextts/attention.py +180 -0
  112. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/indextts/bigvgan.py +124 -0
  113. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/indextts/conformer.py +247 -0
  114. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/indextts/ecapa_tdnn/__init__.py +0 -0
  115. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/indextts/ecapa_tdnn/asp.py +59 -0
  116. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/indextts/ecapa_tdnn/ecapa_tdnn.py +91 -0
  117. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/indextts/ecapa_tdnn/se_res2net.py +132 -0
  118. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/indextts/ecapa_tdnn/tdnn.py +42 -0
  119. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/indextts/gpt2.py +38 -0
  120. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/indextts/indextts.py +412 -0
  121. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/indextts/mel.py +37 -0
  122. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/indextts/normalize.py +294 -0
  123. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/indextts/perceiver.py +62 -0
  124. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/interpolate.py +108 -0
  125. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/kokoro/__init__.py +4 -0
  126. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/kokoro/istftnet.py +979 -0
  127. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/kokoro/kokoro.py +331 -0
  128. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/kokoro/modules.py +659 -0
  129. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/kokoro/pipeline.py +453 -0
  130. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/kokoro/voice.py +113 -0
  131. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/llama/__init__.py +3 -0
  132. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/llama/llama.py +324 -0
  133. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/outetts/__init__.py +1 -0
  134. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/outetts/audio_processor.py +351 -0
  135. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/outetts/dac_interface.py +162 -0
  136. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/outetts/outetts.py +255 -0
  137. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/outetts/prompt_processor.py +181 -0
  138. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/outetts/tokens.py +36 -0
  139. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/sesame/__init__.py +3 -0
  140. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/sesame/attention.py +195 -0
  141. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/sesame/sesame.py +633 -0
  142. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/sesame/watermarking.py +105 -0
  143. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/spark/__init__.py +1 -0
  144. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/spark/audio_tokenizer.py +138 -0
  145. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/spark/bicodec.py +269 -0
  146. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/spark/modules/__init__.py +0 -0
  147. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/spark/modules/blocks/__init__.py +0 -0
  148. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/spark/modules/blocks/sampler.py +111 -0
  149. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/spark/modules/encoder_decoder/__init__.py +0 -0
  150. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/spark/modules/encoder_decoder/feat_decoder.py +120 -0
  151. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/spark/modules/encoder_decoder/feat_encoder.py +136 -0
  152. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/spark/modules/encoder_decoder/wave_generator.py +113 -0
  153. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/spark/modules/finite_scalar_quantization.py +238 -0
  154. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/spark/modules/residual.py +209 -0
  155. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/spark/modules/residual_fsq.py +309 -0
  156. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/spark/modules/speaker/__init__.py +1 -0
  157. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/spark/modules/speaker/ecapa_tdnn.py +283 -0
  158. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/spark/modules/speaker/perceiver_encoder.py +326 -0
  159. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/spark/modules/speaker/pooling_layers.py +297 -0
  160. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/spark/modules/speaker/speaker_encoder.py +155 -0
  161. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/spark/spark.py +382 -0
  162. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/spark/utils/audio.py +220 -0
  163. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/spark/utils/file.py +221 -0
  164. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/models/spark/utils/token_parser.py +181 -0
  165. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/tests/__init__.py +0 -0
  166. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/tests/test_base.py +66 -0
  167. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/tests/test_convert.py +173 -0
  168. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/tests/test_interpolate.py +88 -0
  169. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/tests/test_models.py +974 -0
  170. nexaai/binds/nexa_mlx/py-lib/mlx_audio/tts/utils.py +337 -0
  171. nexaai/binds/nexa_mlx/py-lib/mlx_audio/utils.py +237 -0
  172. nexaai/binds/nexa_mlx/py-lib/mlx_audio/version.py +1 -0
  173. nexaai/binds/nexa_mlx/py-lib/profiling.py +239 -0
  174. nexaai/binds/nexa_nexaml/libggml-base.dylib +0 -0
  175. nexaai/binds/nexa_nexaml/libggml-cpu.so +0 -0
  176. nexaai/binds/nexa_nexaml/libggml-metal.so +0 -0
  177. nexaai/binds/nexa_nexaml/libggml.dylib +0 -0
  178. nexaai/binds/nexa_nexaml/libnexa-mm-process.dylib +0 -0
  179. nexaai/binds/nexa_nexaml/libnexa-sampling.dylib +0 -0
  180. nexaai/binds/nexa_nexaml/libnexa_plugin.dylib +0 -0
  181. nexaai/binds/nexa_nexaml/libnexaproc.dylib +0 -0
  182. nexaai/binds/nexa_nexaml/libqwen3-vl.dylib +0 -0
  183. nexaai/binds/nexa_nexaml/libqwen3vl-vision.dylib +0 -0
  184. nexaai/binds/vlm_bind.cpython-310-darwin.so +0 -0
  185. nexaai/common.py +104 -0
  186. nexaai/cv.py +92 -0
  187. nexaai/cv_impl/__init__.py +0 -0
  188. nexaai/cv_impl/mlx_cv_impl.py +89 -0
  189. nexaai/cv_impl/pybind_cv_impl.py +32 -0
  190. nexaai/embedder.py +72 -0
  191. nexaai/embedder_impl/__init__.py +0 -0
  192. nexaai/embedder_impl/mlx_embedder_impl.py +116 -0
  193. nexaai/embedder_impl/pybind_embedder_impl.py +95 -0
  194. nexaai/image_gen.py +140 -0
  195. nexaai/image_gen_impl/__init__.py +0 -0
  196. nexaai/image_gen_impl/mlx_image_gen_impl.py +292 -0
  197. nexaai/image_gen_impl/pybind_image_gen_impl.py +85 -0
  198. nexaai/llm.py +96 -0
  199. nexaai/llm_impl/__init__.py +0 -0
  200. nexaai/llm_impl/mlx_llm_impl.py +269 -0
  201. nexaai/llm_impl/pybind_llm_impl.py +218 -0
  202. nexaai/log.py +92 -0
  203. nexaai/mlx_backend/asr/__init__.py +12 -0
  204. nexaai/mlx_backend/asr/interface.py +122 -0
  205. nexaai/mlx_backend/common/__init__.py +0 -0
  206. nexaai/mlx_backend/common/utils.py +25 -0
  207. nexaai/mlx_backend/cv/__init__.py +0 -0
  208. nexaai/mlx_backend/cv/generate.py +195 -0
  209. nexaai/mlx_backend/cv/interface.py +151 -0
  210. nexaai/mlx_backend/cv/main.py +81 -0
  211. nexaai/mlx_backend/cv/modeling/pp_ocr_v4.py +1736 -0
  212. nexaai/mlx_backend/embedding/__init__.py +0 -0
  213. nexaai/mlx_backend/embedding/generate.py +333 -0
  214. nexaai/mlx_backend/embedding/interface.py +617 -0
  215. nexaai/mlx_backend/embedding/main.py +173 -0
  216. nexaai/mlx_backend/embedding/modeling/__init__.py +0 -0
  217. nexaai/mlx_backend/embedding/modeling/nexa_jina_v2.py +399 -0
  218. nexaai/mlx_backend/image_gen/__init__.py +1 -0
  219. nexaai/mlx_backend/image_gen/generate_sd.py +244 -0
  220. nexaai/mlx_backend/image_gen/interface.py +82 -0
  221. nexaai/mlx_backend/image_gen/main.py +281 -0
  222. nexaai/mlx_backend/image_gen/stable_diffusion/__init__.py +306 -0
  223. nexaai/mlx_backend/image_gen/stable_diffusion/clip.py +116 -0
  224. nexaai/mlx_backend/image_gen/stable_diffusion/config.py +65 -0
  225. nexaai/mlx_backend/image_gen/stable_diffusion/model_io.py +386 -0
  226. nexaai/mlx_backend/image_gen/stable_diffusion/sampler.py +105 -0
  227. nexaai/mlx_backend/image_gen/stable_diffusion/tokenizer.py +100 -0
  228. nexaai/mlx_backend/image_gen/stable_diffusion/unet.py +460 -0
  229. nexaai/mlx_backend/image_gen/stable_diffusion/vae.py +274 -0
  230. nexaai/mlx_backend/llm/__init__.py +0 -0
  231. nexaai/mlx_backend/llm/generate.py +149 -0
  232. nexaai/mlx_backend/llm/interface.py +764 -0
  233. nexaai/mlx_backend/llm/main.py +68 -0
  234. nexaai/mlx_backend/ml.py +888 -0
  235. nexaai/mlx_backend/mlx_audio/__init__.py +0 -0
  236. nexaai/mlx_backend/mlx_audio/codec/__init__.py +1 -0
  237. nexaai/mlx_backend/mlx_audio/codec/models/__init__.py +5 -0
  238. nexaai/mlx_backend/mlx_audio/codec/models/bigvgan/__init__.py +1 -0
  239. nexaai/mlx_backend/mlx_audio/codec/models/bigvgan/activation.py +51 -0
  240. nexaai/mlx_backend/mlx_audio/codec/models/bigvgan/amp.py +96 -0
  241. nexaai/mlx_backend/mlx_audio/codec/models/bigvgan/bigvgan.py +149 -0
  242. nexaai/mlx_backend/mlx_audio/codec/models/bigvgan/conv.py +114 -0
  243. nexaai/mlx_backend/mlx_audio/codec/models/bigvgan/resample.py +177 -0
  244. nexaai/mlx_backend/mlx_audio/codec/models/descript/__init__.py +1 -0
  245. nexaai/mlx_backend/mlx_audio/codec/models/descript/base.py +228 -0
  246. nexaai/mlx_backend/mlx_audio/codec/models/descript/dac.py +285 -0
  247. nexaai/mlx_backend/mlx_audio/codec/models/descript/nn/__init__.py +1 -0
  248. nexaai/mlx_backend/mlx_audio/codec/models/descript/nn/layers.py +129 -0
  249. nexaai/mlx_backend/mlx_audio/codec/models/descript/nn/quantize.py +149 -0
  250. nexaai/mlx_backend/mlx_audio/codec/models/encodec/__init__.py +1 -0
  251. nexaai/mlx_backend/mlx_audio/codec/models/encodec/encodec.py +777 -0
  252. nexaai/mlx_backend/mlx_audio/codec/models/mimi/__init__.py +1 -0
  253. nexaai/mlx_backend/mlx_audio/codec/models/mimi/mimi.py +286 -0
  254. nexaai/mlx_backend/mlx_audio/codec/models/mimi/modules/__init__.py +20 -0
  255. nexaai/mlx_backend/mlx_audio/codec/models/mimi/modules/conv.py +398 -0
  256. nexaai/mlx_backend/mlx_audio/codec/models/mimi/modules/kv_cache.py +199 -0
  257. nexaai/mlx_backend/mlx_audio/codec/models/mimi/modules/quantization.py +179 -0
  258. nexaai/mlx_backend/mlx_audio/codec/models/mimi/modules/seanet.py +314 -0
  259. nexaai/mlx_backend/mlx_audio/codec/models/mimi/modules/transformer.py +256 -0
  260. nexaai/mlx_backend/mlx_audio/codec/models/s3/__init__.py +1 -0
  261. nexaai/mlx_backend/mlx_audio/codec/models/s3/model.py +260 -0
  262. nexaai/mlx_backend/mlx_audio/codec/models/s3/model_v2.py +383 -0
  263. nexaai/mlx_backend/mlx_audio/codec/models/s3/utils.py +122 -0
  264. nexaai/mlx_backend/mlx_audio/codec/models/snac/__init__.py +1 -0
  265. nexaai/mlx_backend/mlx_audio/codec/models/snac/attention.py +97 -0
  266. nexaai/mlx_backend/mlx_audio/codec/models/snac/layers.py +306 -0
  267. nexaai/mlx_backend/mlx_audio/codec/models/snac/snac.py +154 -0
  268. nexaai/mlx_backend/mlx_audio/codec/models/snac/vq.py +135 -0
  269. nexaai/mlx_backend/mlx_audio/codec/models/vocos/__init__.py +1 -0
  270. nexaai/mlx_backend/mlx_audio/codec/models/vocos/mel.py +33 -0
  271. nexaai/mlx_backend/mlx_audio/codec/models/vocos/vocos.py +359 -0
  272. nexaai/mlx_backend/mlx_audio/codec/tests/__init__.py +0 -0
  273. nexaai/mlx_backend/mlx_audio/codec/tests/test_bigvgan.py +54 -0
  274. nexaai/mlx_backend/mlx_audio/codec/tests/test_descript.py +109 -0
  275. nexaai/mlx_backend/mlx_audio/codec/tests/test_encodec.py +58 -0
  276. nexaai/mlx_backend/mlx_audio/codec/tests/test_mimi.py +22 -0
  277. nexaai/mlx_backend/mlx_audio/codec/tests/test_s3.py +25 -0
  278. nexaai/mlx_backend/mlx_audio/codec/tests/test_snac.py +40 -0
  279. nexaai/mlx_backend/mlx_audio/codec/tests/test_vocos.py +93 -0
  280. nexaai/mlx_backend/mlx_audio/server.py +525 -0
  281. nexaai/mlx_backend/mlx_audio/sts/__init__.py +0 -0
  282. nexaai/mlx_backend/mlx_audio/sts/tests/test_voice_pipeline.py +156 -0
  283. nexaai/mlx_backend/mlx_audio/sts/voice_pipeline.py +327 -0
  284. nexaai/mlx_backend/mlx_audio/stt/__init__.py +0 -0
  285. nexaai/mlx_backend/mlx_audio/stt/generate.py +174 -0
  286. nexaai/mlx_backend/mlx_audio/stt/models/__init__.py +0 -0
  287. nexaai/mlx_backend/mlx_audio/stt/models/parakeet/__init__.py +1 -0
  288. nexaai/mlx_backend/mlx_audio/stt/models/parakeet/alignment.py +248 -0
  289. nexaai/mlx_backend/mlx_audio/stt/models/parakeet/attention.py +187 -0
  290. nexaai/mlx_backend/mlx_audio/stt/models/parakeet/audio.py +76 -0
  291. nexaai/mlx_backend/mlx_audio/stt/models/parakeet/conformer.py +331 -0
  292. nexaai/mlx_backend/mlx_audio/stt/models/parakeet/ctc.py +34 -0
  293. nexaai/mlx_backend/mlx_audio/stt/models/parakeet/parakeet.py +604 -0
  294. nexaai/mlx_backend/mlx_audio/stt/models/parakeet/rnnt.py +157 -0
  295. nexaai/mlx_backend/mlx_audio/stt/models/parakeet/tokenizer.py +2 -0
  296. nexaai/mlx_backend/mlx_audio/stt/models/wav2vec/feature_extractor.py +757 -0
  297. nexaai/mlx_backend/mlx_audio/stt/models/wav2vec/wav2vec.py +738 -0
  298. nexaai/mlx_backend/mlx_audio/stt/models/whisper/__init__.py +1 -0
  299. nexaai/mlx_backend/mlx_audio/stt/models/whisper/audio.py +82 -0
  300. nexaai/mlx_backend/mlx_audio/stt/models/whisper/decoding.py +742 -0
  301. nexaai/mlx_backend/mlx_audio/stt/models/whisper/timing.py +329 -0
  302. nexaai/mlx_backend/mlx_audio/stt/models/whisper/tokenizer.py +398 -0
  303. nexaai/mlx_backend/mlx_audio/stt/models/whisper/whisper.py +862 -0
  304. nexaai/mlx_backend/mlx_audio/stt/models/whisper/writers.py +268 -0
  305. nexaai/mlx_backend/mlx_audio/stt/tests/test_models.py +381 -0
  306. nexaai/mlx_backend/mlx_audio/stt/utils.py +195 -0
  307. nexaai/mlx_backend/mlx_audio/tts/__init__.py +1 -0
  308. nexaai/mlx_backend/mlx_audio/tts/audio_player.py +120 -0
  309. nexaai/mlx_backend/mlx_audio/tts/convert.py +71 -0
  310. nexaai/mlx_backend/mlx_audio/tts/generate.py +449 -0
  311. nexaai/mlx_backend/mlx_audio/tts/models/__init__.py +0 -0
  312. nexaai/mlx_backend/mlx_audio/tts/models/bark/__init__.py +4 -0
  313. nexaai/mlx_backend/mlx_audio/tts/models/bark/bark.py +528 -0
  314. nexaai/mlx_backend/mlx_audio/tts/models/bark/isftnet.py +12 -0
  315. nexaai/mlx_backend/mlx_audio/tts/models/bark/pipeline.py +442 -0
  316. nexaai/mlx_backend/mlx_audio/tts/models/base.py +84 -0
  317. nexaai/mlx_backend/mlx_audio/tts/models/dia/__init__.py +1 -0
  318. nexaai/mlx_backend/mlx_audio/tts/models/dia/audio.py +287 -0
  319. nexaai/mlx_backend/mlx_audio/tts/models/dia/config.py +256 -0
  320. nexaai/mlx_backend/mlx_audio/tts/models/dia/dia.py +592 -0
  321. nexaai/mlx_backend/mlx_audio/tts/models/dia/layers.py +870 -0
  322. nexaai/mlx_backend/mlx_audio/tts/models/indextts/__init__.py +3 -0
  323. nexaai/mlx_backend/mlx_audio/tts/models/indextts/attention.py +180 -0
  324. nexaai/mlx_backend/mlx_audio/tts/models/indextts/bigvgan.py +124 -0
  325. nexaai/mlx_backend/mlx_audio/tts/models/indextts/conformer.py +247 -0
  326. nexaai/mlx_backend/mlx_audio/tts/models/indextts/ecapa_tdnn/__init__.py +0 -0
  327. nexaai/mlx_backend/mlx_audio/tts/models/indextts/ecapa_tdnn/asp.py +59 -0
  328. nexaai/mlx_backend/mlx_audio/tts/models/indextts/ecapa_tdnn/ecapa_tdnn.py +91 -0
  329. nexaai/mlx_backend/mlx_audio/tts/models/indextts/ecapa_tdnn/se_res2net.py +132 -0
  330. nexaai/mlx_backend/mlx_audio/tts/models/indextts/ecapa_tdnn/tdnn.py +42 -0
  331. nexaai/mlx_backend/mlx_audio/tts/models/indextts/gpt2.py +38 -0
  332. nexaai/mlx_backend/mlx_audio/tts/models/indextts/indextts.py +412 -0
  333. nexaai/mlx_backend/mlx_audio/tts/models/indextts/mel.py +37 -0
  334. nexaai/mlx_backend/mlx_audio/tts/models/indextts/normalize.py +294 -0
  335. nexaai/mlx_backend/mlx_audio/tts/models/indextts/perceiver.py +62 -0
  336. nexaai/mlx_backend/mlx_audio/tts/models/interpolate.py +108 -0
  337. nexaai/mlx_backend/mlx_audio/tts/models/kokoro/__init__.py +4 -0
  338. nexaai/mlx_backend/mlx_audio/tts/models/kokoro/istftnet.py +979 -0
  339. nexaai/mlx_backend/mlx_audio/tts/models/kokoro/kokoro.py +331 -0
  340. nexaai/mlx_backend/mlx_audio/tts/models/kokoro/modules.py +659 -0
  341. nexaai/mlx_backend/mlx_audio/tts/models/kokoro/pipeline.py +453 -0
  342. nexaai/mlx_backend/mlx_audio/tts/models/kokoro/voice.py +113 -0
  343. nexaai/mlx_backend/mlx_audio/tts/models/llama/__init__.py +3 -0
  344. nexaai/mlx_backend/mlx_audio/tts/models/llama/llama.py +324 -0
  345. nexaai/mlx_backend/mlx_audio/tts/models/outetts/__init__.py +1 -0
  346. nexaai/mlx_backend/mlx_audio/tts/models/outetts/audio_processor.py +351 -0
  347. nexaai/mlx_backend/mlx_audio/tts/models/outetts/dac_interface.py +162 -0
  348. nexaai/mlx_backend/mlx_audio/tts/models/outetts/default_speaker.json +461 -0
  349. nexaai/mlx_backend/mlx_audio/tts/models/outetts/outetts.py +255 -0
  350. nexaai/mlx_backend/mlx_audio/tts/models/outetts/prompt_processor.py +181 -0
  351. nexaai/mlx_backend/mlx_audio/tts/models/outetts/tokens.py +36 -0
  352. nexaai/mlx_backend/mlx_audio/tts/models/sesame/__init__.py +3 -0
  353. nexaai/mlx_backend/mlx_audio/tts/models/sesame/attention.py +195 -0
  354. nexaai/mlx_backend/mlx_audio/tts/models/sesame/sesame.py +633 -0
  355. nexaai/mlx_backend/mlx_audio/tts/models/sesame/watermarking.py +105 -0
  356. nexaai/mlx_backend/mlx_audio/tts/models/spark/__init__.py +1 -0
  357. nexaai/mlx_backend/mlx_audio/tts/models/spark/audio_tokenizer.py +138 -0
  358. nexaai/mlx_backend/mlx_audio/tts/models/spark/bicodec.py +269 -0
  359. nexaai/mlx_backend/mlx_audio/tts/models/spark/modules/__init__.py +0 -0
  360. nexaai/mlx_backend/mlx_audio/tts/models/spark/modules/blocks/__init__.py +0 -0
  361. nexaai/mlx_backend/mlx_audio/tts/models/spark/modules/blocks/sampler.py +111 -0
  362. nexaai/mlx_backend/mlx_audio/tts/models/spark/modules/encoder_decoder/__init__.py +0 -0
  363. nexaai/mlx_backend/mlx_audio/tts/models/spark/modules/encoder_decoder/feat_decoder.py +120 -0
  364. nexaai/mlx_backend/mlx_audio/tts/models/spark/modules/encoder_decoder/feat_encoder.py +136 -0
  365. nexaai/mlx_backend/mlx_audio/tts/models/spark/modules/encoder_decoder/wave_generator.py +113 -0
  366. nexaai/mlx_backend/mlx_audio/tts/models/spark/modules/finite_scalar_quantization.py +238 -0
  367. nexaai/mlx_backend/mlx_audio/tts/models/spark/modules/residual.py +209 -0
  368. nexaai/mlx_backend/mlx_audio/tts/models/spark/modules/residual_fsq.py +309 -0
  369. nexaai/mlx_backend/mlx_audio/tts/models/spark/modules/speaker/__init__.py +1 -0
  370. nexaai/mlx_backend/mlx_audio/tts/models/spark/modules/speaker/ecapa_tdnn.py +283 -0
  371. nexaai/mlx_backend/mlx_audio/tts/models/spark/modules/speaker/perceiver_encoder.py +326 -0
  372. nexaai/mlx_backend/mlx_audio/tts/models/spark/modules/speaker/pooling_layers.py +297 -0
  373. nexaai/mlx_backend/mlx_audio/tts/models/spark/modules/speaker/speaker_encoder.py +155 -0
  374. nexaai/mlx_backend/mlx_audio/tts/models/spark/spark.py +382 -0
  375. nexaai/mlx_backend/mlx_audio/tts/models/spark/utils/audio.py +220 -0
  376. nexaai/mlx_backend/mlx_audio/tts/models/spark/utils/file.py +221 -0
  377. nexaai/mlx_backend/mlx_audio/tts/models/spark/utils/token_parser.py +181 -0
  378. nexaai/mlx_backend/mlx_audio/tts/tests/__init__.py +0 -0
  379. nexaai/mlx_backend/mlx_audio/tts/tests/test_base.py +66 -0
  380. nexaai/mlx_backend/mlx_audio/tts/tests/test_convert.py +173 -0
  381. nexaai/mlx_backend/mlx_audio/tts/tests/test_interpolate.py +88 -0
  382. nexaai/mlx_backend/mlx_audio/tts/tests/test_models.py +974 -0
  383. nexaai/mlx_backend/mlx_audio/tts/utils.py +337 -0
  384. nexaai/mlx_backend/mlx_audio/utils.py +237 -0
  385. nexaai/mlx_backend/mlx_audio/version.py +1 -0
  386. nexaai/mlx_backend/profiling.py +239 -0
  387. nexaai/mlx_backend/rerank/__init__.py +0 -0
  388. nexaai/mlx_backend/rerank/generate.py +174 -0
  389. nexaai/mlx_backend/rerank/interface.py +287 -0
  390. nexaai/mlx_backend/rerank/main.py +127 -0
  391. nexaai/mlx_backend/rerank/modeling/__init__.py +0 -0
  392. nexaai/mlx_backend/rerank/modeling/nexa_jina_rerank.py +330 -0
  393. nexaai/mlx_backend/sd/__init__.py +1 -0
  394. nexaai/mlx_backend/sd/interface.py +362 -0
  395. nexaai/mlx_backend/sd/main.py +286 -0
  396. nexaai/mlx_backend/sd/modeling/__init__.py +306 -0
  397. nexaai/mlx_backend/sd/modeling/clip.py +116 -0
  398. nexaai/mlx_backend/sd/modeling/config.py +65 -0
  399. nexaai/mlx_backend/sd/modeling/model_io.py +385 -0
  400. nexaai/mlx_backend/sd/modeling/sampler.py +105 -0
  401. nexaai/mlx_backend/sd/modeling/tokenizer.py +100 -0
  402. nexaai/mlx_backend/sd/modeling/unet.py +460 -0
  403. nexaai/mlx_backend/sd/modeling/vae.py +274 -0
  404. nexaai/mlx_backend/tts/__init__.py +12 -0
  405. nexaai/mlx_backend/tts/interface.py +276 -0
  406. nexaai/mlx_backend/vlm/__init__.py +3 -0
  407. nexaai/mlx_backend/vlm/generate.py +572 -0
  408. nexaai/mlx_backend/vlm/generate_qwen3_vl.py +261 -0
  409. nexaai/mlx_backend/vlm/interface.py +415 -0
  410. nexaai/mlx_backend/vlm/main.py +316 -0
  411. nexaai/mlx_backend/vlm/modeling/__init__.py +0 -0
  412. nexaai/mlx_backend/vlm/modeling/convert.py +68 -0
  413. nexaai/mlx_backend/vlm/modeling/models/__init__.py +0 -0
  414. nexaai/mlx_backend/vlm/modeling/models/aya_vision/__init__.py +8 -0
  415. nexaai/mlx_backend/vlm/modeling/models/aya_vision/aya_vision.py +193 -0
  416. nexaai/mlx_backend/vlm/modeling/models/aya_vision/interpolate.py +186 -0
  417. nexaai/mlx_backend/vlm/modeling/models/aya_vision/language.py +233 -0
  418. nexaai/mlx_backend/vlm/modeling/models/aya_vision/vision.py +503 -0
  419. nexaai/mlx_backend/vlm/modeling/models/base.py +202 -0
  420. nexaai/mlx_backend/vlm/modeling/models/cache.py +230 -0
  421. nexaai/mlx_backend/vlm/modeling/models/deepseek_vl_v2/__init__.py +10 -0
  422. nexaai/mlx_backend/vlm/modeling/models/deepseek_vl_v2/conversation.py +264 -0
  423. nexaai/mlx_backend/vlm/modeling/models/deepseek_vl_v2/deepseek_vl_v2.py +472 -0
  424. nexaai/mlx_backend/vlm/modeling/models/deepseek_vl_v2/language.py +591 -0
  425. nexaai/mlx_backend/vlm/modeling/models/deepseek_vl_v2/processing_deepsek_vl_v2.py +526 -0
  426. nexaai/mlx_backend/vlm/modeling/models/deepseek_vl_v2/vision.py +356 -0
  427. nexaai/mlx_backend/vlm/modeling/models/florence2/__init__.py +8 -0
  428. nexaai/mlx_backend/vlm/modeling/models/florence2/florence2.py +366 -0
  429. nexaai/mlx_backend/vlm/modeling/models/florence2/language.py +488 -0
  430. nexaai/mlx_backend/vlm/modeling/models/florence2/vision.py +591 -0
  431. nexaai/mlx_backend/vlm/modeling/models/gemma3/__init__.py +8 -0
  432. nexaai/mlx_backend/vlm/modeling/models/gemma3/gemma3.py +213 -0
  433. nexaai/mlx_backend/vlm/modeling/models/gemma3/language.py +315 -0
  434. nexaai/mlx_backend/vlm/modeling/models/gemma3/vision.py +238 -0
  435. nexaai/mlx_backend/vlm/modeling/models/gemma3n/__init__.py +2 -0
  436. nexaai/mlx_backend/vlm/modeling/models/gemma3n/audio.py +1038 -0
  437. nexaai/mlx_backend/vlm/modeling/models/gemma3n/config.py +139 -0
  438. nexaai/mlx_backend/vlm/modeling/models/gemma3n/gemma3n.py +322 -0
  439. nexaai/mlx_backend/vlm/modeling/models/gemma3n/language.py +629 -0
  440. nexaai/mlx_backend/vlm/modeling/models/gemma3n/vision.py +1022 -0
  441. nexaai/mlx_backend/vlm/modeling/models/idefics2/__init__.py +9 -0
  442. nexaai/mlx_backend/vlm/modeling/models/idefics2/idefics2.py +294 -0
  443. nexaai/mlx_backend/vlm/modeling/models/idefics2/language.py +191 -0
  444. nexaai/mlx_backend/vlm/modeling/models/idefics2/vision.py +267 -0
  445. nexaai/mlx_backend/vlm/modeling/models/idefics3/__init__.py +8 -0
  446. nexaai/mlx_backend/vlm/modeling/models/idefics3/idefics3.py +175 -0
  447. nexaai/mlx_backend/vlm/modeling/models/idefics3/language.py +192 -0
  448. nexaai/mlx_backend/vlm/modeling/models/idefics3/vision.py +233 -0
  449. nexaai/mlx_backend/vlm/modeling/models/internvl_chat/__init__.py +9 -0
  450. nexaai/mlx_backend/vlm/modeling/models/internvl_chat/internvl_chat.py +140 -0
  451. nexaai/mlx_backend/vlm/modeling/models/internvl_chat/language.py +220 -0
  452. nexaai/mlx_backend/vlm/modeling/models/internvl_chat/processor.py +393 -0
  453. nexaai/mlx_backend/vlm/modeling/models/internvl_chat/vision.py +293 -0
  454. nexaai/mlx_backend/vlm/modeling/models/kernels.py +307 -0
  455. nexaai/mlx_backend/vlm/modeling/models/kimi_vl/__init__.py +8 -0
  456. nexaai/mlx_backend/vlm/modeling/models/kimi_vl/kimi_vl.py +143 -0
  457. nexaai/mlx_backend/vlm/modeling/models/kimi_vl/language.py +509 -0
  458. nexaai/mlx_backend/vlm/modeling/models/kimi_vl/vision.py +522 -0
  459. nexaai/mlx_backend/vlm/modeling/models/llama4/__init__.py +8 -0
  460. nexaai/mlx_backend/vlm/modeling/models/llama4/language.py +386 -0
  461. nexaai/mlx_backend/vlm/modeling/models/llama4/llama4.py +138 -0
  462. nexaai/mlx_backend/vlm/modeling/models/llama4/vision.py +560 -0
  463. nexaai/mlx_backend/vlm/modeling/models/llava/__init__.py +8 -0
  464. nexaai/mlx_backend/vlm/modeling/models/llava/language.py +240 -0
  465. nexaai/mlx_backend/vlm/modeling/models/llava/llava.py +153 -0
  466. nexaai/mlx_backend/vlm/modeling/models/llava/vision.py +259 -0
  467. nexaai/mlx_backend/vlm/modeling/models/llava_bunny/__init__.py +9 -0
  468. nexaai/mlx_backend/vlm/modeling/models/llava_bunny/language.py +236 -0
  469. nexaai/mlx_backend/vlm/modeling/models/llava_bunny/llava_bunny.py +256 -0
  470. nexaai/mlx_backend/vlm/modeling/models/llava_bunny/vision.py +303 -0
  471. nexaai/mlx_backend/vlm/modeling/models/llava_next/__init__.py +8 -0
  472. nexaai/mlx_backend/vlm/modeling/models/llava_next/language.py +230 -0
  473. nexaai/mlx_backend/vlm/modeling/models/llava_next/llava_next.py +160 -0
  474. nexaai/mlx_backend/vlm/modeling/models/llava_next/vision.py +243 -0
  475. nexaai/mlx_backend/vlm/modeling/models/mistral3/__init__.py +8 -0
  476. nexaai/mlx_backend/vlm/modeling/models/mistral3/mistral3.py +283 -0
  477. nexaai/mlx_backend/vlm/modeling/models/mllama/__init__.py +8 -0
  478. nexaai/mlx_backend/vlm/modeling/models/mllama/language.py +416 -0
  479. nexaai/mlx_backend/vlm/modeling/models/mllama/mllama.py +172 -0
  480. nexaai/mlx_backend/vlm/modeling/models/mllama/vision.py +499 -0
  481. nexaai/mlx_backend/vlm/modeling/models/molmo/__init__.py +8 -0
  482. nexaai/mlx_backend/vlm/modeling/models/molmo/language.py +243 -0
  483. nexaai/mlx_backend/vlm/modeling/models/molmo/molmo.py +133 -0
  484. nexaai/mlx_backend/vlm/modeling/models/molmo/vision.py +465 -0
  485. nexaai/mlx_backend/vlm/modeling/models/multi_modality/__init__.py +10 -0
  486. nexaai/mlx_backend/vlm/modeling/models/multi_modality/language.py +230 -0
  487. nexaai/mlx_backend/vlm/modeling/models/multi_modality/multi_modality.py +385 -0
  488. nexaai/mlx_backend/vlm/modeling/models/multi_modality/sam.py +557 -0
  489. nexaai/mlx_backend/vlm/modeling/models/multi_modality/vision.py +526 -0
  490. nexaai/mlx_backend/vlm/modeling/models/paligemma/__init__.py +8 -0
  491. nexaai/mlx_backend/vlm/modeling/models/paligemma/language.py +282 -0
  492. nexaai/mlx_backend/vlm/modeling/models/paligemma/paligemma.py +160 -0
  493. nexaai/mlx_backend/vlm/modeling/models/paligemma/vision.py +242 -0
  494. nexaai/mlx_backend/vlm/modeling/models/phi3_v/__init__.py +8 -0
  495. nexaai/mlx_backend/vlm/modeling/models/phi3_v/language.py +21 -0
  496. nexaai/mlx_backend/vlm/modeling/models/phi3_v/phi3_v.py +243 -0
  497. nexaai/mlx_backend/vlm/modeling/models/phi3_v/su_rope.py +71 -0
  498. nexaai/mlx_backend/vlm/modeling/models/phi3_v/vision.py +324 -0
  499. nexaai/mlx_backend/vlm/modeling/models/pixtral/__init__.py +8 -0
  500. nexaai/mlx_backend/vlm/modeling/models/pixtral/language.py +229 -0
  501. nexaai/mlx_backend/vlm/modeling/models/pixtral/pixtral.py +161 -0
  502. nexaai/mlx_backend/vlm/modeling/models/pixtral/vision.py +320 -0
  503. nexaai/mlx_backend/vlm/modeling/models/qwen2_5_vl/__init__.py +2 -0
  504. nexaai/mlx_backend/vlm/modeling/models/qwen2_5_vl/config.py +108 -0
  505. nexaai/mlx_backend/vlm/modeling/models/qwen2_5_vl/language.py +490 -0
  506. nexaai/mlx_backend/vlm/modeling/models/qwen2_5_vl/qwen2_5_vl.py +168 -0
  507. nexaai/mlx_backend/vlm/modeling/models/qwen2_5_vl/vision.py +414 -0
  508. nexaai/mlx_backend/vlm/modeling/models/qwen2_vl/__init__.py +2 -0
  509. nexaai/mlx_backend/vlm/modeling/models/qwen2_vl/config.py +104 -0
  510. nexaai/mlx_backend/vlm/modeling/models/qwen2_vl/language.py +490 -0
  511. nexaai/mlx_backend/vlm/modeling/models/qwen2_vl/qwen2_vl.py +167 -0
  512. nexaai/mlx_backend/vlm/modeling/models/qwen2_vl/vision.py +312 -0
  513. nexaai/mlx_backend/vlm/modeling/models/qwen3_vl/llm_common/__init__.py +0 -0
  514. nexaai/mlx_backend/vlm/modeling/models/qwen3_vl/llm_common/base.py +117 -0
  515. nexaai/mlx_backend/vlm/modeling/models/qwen3_vl/llm_common/cache.py +531 -0
  516. nexaai/mlx_backend/vlm/modeling/models/qwen3_vl/llm_common/generate.py +701 -0
  517. nexaai/mlx_backend/vlm/modeling/models/qwen3_vl/llm_common/rope_utils.py +255 -0
  518. nexaai/mlx_backend/vlm/modeling/models/qwen3_vl/llm_common/sample_utils.py +303 -0
  519. nexaai/mlx_backend/vlm/modeling/models/qwen3_vl/llm_common/tokenizer_utils.py +407 -0
  520. nexaai/mlx_backend/vlm/modeling/models/qwen3_vl/processor.py +476 -0
  521. nexaai/mlx_backend/vlm/modeling/models/qwen3_vl/qwen3vl.py +1223 -0
  522. nexaai/mlx_backend/vlm/modeling/models/smolvlm/__init__.py +8 -0
  523. nexaai/mlx_backend/vlm/modeling/models/smolvlm/smolvlm.py +62 -0
  524. nexaai/mlx_backend/vlm/modeling/processing_qwen2_5_vl.py +209 -0
  525. nexaai/mlx_backend/vlm/modeling/processing_qwen2_vl.py +215 -0
  526. nexaai/mlx_backend/vlm/modeling/prompt_utils.py +474 -0
  527. nexaai/mlx_backend/vlm/modeling/sample_utils.py +39 -0
  528. nexaai/mlx_backend/vlm/modeling/tokenizer_utils.py +344 -0
  529. nexaai/mlx_backend/vlm/modeling/trainer/__init__.py +9 -0
  530. nexaai/mlx_backend/vlm/modeling/trainer/lora.py +70 -0
  531. nexaai/mlx_backend/vlm/modeling/trainer/trainer.py +296 -0
  532. nexaai/mlx_backend/vlm/modeling/trainer/utils.py +160 -0
  533. nexaai/mlx_backend/vlm/modeling/utils.py +928 -0
  534. nexaai/rerank.py +55 -0
  535. nexaai/rerank_impl/__init__.py +0 -0
  536. nexaai/rerank_impl/mlx_rerank_impl.py +92 -0
  537. nexaai/rerank_impl/pybind_rerank_impl.py +43 -0
  538. nexaai/runtime.py +68 -0
  539. nexaai/tts.py +74 -0
  540. nexaai/tts_impl/__init__.py +0 -0
  541. nexaai/tts_impl/mlx_tts_impl.py +94 -0
  542. nexaai/tts_impl/pybind_tts_impl.py +43 -0
  543. nexaai/utils/avatar_fetcher.py +104 -0
  544. nexaai/utils/decode.py +18 -0
  545. nexaai/utils/manifest_utils.py +324 -0
  546. nexaai/utils/model_manager.py +1353 -0
  547. nexaai/utils/model_types.py +47 -0
  548. nexaai/utils/progress_tracker.py +385 -0
  549. nexaai/utils/quantization_utils.py +245 -0
  550. nexaai/vlm.py +128 -0
  551. nexaai/vlm_impl/__init__.py +0 -0
  552. nexaai/vlm_impl/mlx_vlm_impl.py +258 -0
  553. nexaai/vlm_impl/pybind_vlm_impl.py +230 -0
  554. nexaai-1.0.16rc13.dist-info/METADATA +32 -0
  555. nexaai-1.0.16rc13.dist-info/RECORD +557 -0
  556. nexaai-1.0.16rc13.dist-info/WHEEL +5 -0
  557. nexaai-1.0.16rc13.dist-info/top_level.txt +1 -0
@@ -0,0 +1,239 @@
1
+ from __future__ import annotations
2
+
3
+ import time
4
+ from dataclasses import dataclass, field
5
+ from typing import Any, Optional
6
+ from enum import IntEnum
7
+
8
+ # --------------------------------------------------------------------------------------
9
+ # Stop reason constants matching profile.h
10
+ # --------------------------------------------------------------------------------------
11
+
12
+ class StopReason(IntEnum):
13
+ """Stop reason constants matching profile.h"""
14
+ ML_STOP_REASON_UNKNOWN = 0
15
+ ML_STOP_REASON_EOS = 1
16
+ ML_STOP_REASON_LENGTH = 2
17
+ ML_STOP_REASON_USER = 3
18
+ ML_STOP_REASON_STOP_SEQUENCE = 4
19
+ ML_STOP_REASON_COMPLETED = 5
20
+
21
+ # --------------------------------------------------------------------------------------
22
+ # Profiling data structure
23
+ # --------------------------------------------------------------------------------------
24
+
25
+ @dataclass
26
+ class ProfilingData:
27
+ """Profiling data for performance metrics."""
28
+ ttft_us: int = 0 # Time to first token (us)
29
+ total_time_us: int = 0 # Total generation time (us)
30
+ prompt_time_us: int = 0 # Prompt processing time (us)
31
+ decode_time_us: int = 0 # Token generation time (us)
32
+ tokens_per_second: float = 0.0 # Decoding speed (tokens/sec)
33
+ total_tokens: int = 0 # Total tokens generated
34
+ prompt_tokens: int = 0 # Number of prompt tokens
35
+ generated_tokens: int = 0 # Number of generated tokens
36
+ stop_reason: int = StopReason.ML_STOP_REASON_UNKNOWN # Stop reason (numeric)
37
+
38
+ def reset(self):
39
+ """Reset all profiling data."""
40
+ self.ttft_us = 0
41
+ self.total_time_us = 0
42
+ self.prompt_time_us = 0
43
+ self.decode_time_us = 0
44
+ self.tokens_per_second = 0.0
45
+ self.total_tokens = 0
46
+ self.prompt_tokens = 0
47
+ self.generated_tokens = 0
48
+ self.stop_reason = StopReason.ML_STOP_REASON_UNKNOWN
49
+
50
+ # --------------------------------------------------------------------------------------
51
+ # Profiling context (similar to ml_ProfilingContext in profile.h)
52
+ # --------------------------------------------------------------------------------------
53
+
54
+ @dataclass
55
+ class ProfilingContext:
56
+ """Profiling context for tracking timing and state."""
57
+ start_time: Optional[float] = None
58
+ prompt_start_time: Optional[float] = None
59
+ prompt_end_time: Optional[float] = None
60
+ decode_start_time: Optional[float] = None
61
+ decode_end_time: Optional[float] = None
62
+ first_token_time: Optional[float] = None
63
+ end_time: Optional[float] = None
64
+
65
+ ttft_recorded: bool = False
66
+ stop_reason: int = StopReason.ML_STOP_REASON_UNKNOWN
67
+ prompt_tokens: int = 0
68
+ generated_tokens: int = 0
69
+
70
+ def reset(self):
71
+ """Reset profiling context."""
72
+ self.start_time = None
73
+ self.prompt_start_time = None
74
+ self.prompt_end_time = None
75
+ self.decode_start_time = None
76
+ self.decode_end_time = None
77
+ self.first_token_time = None
78
+ self.end_time = None
79
+ self.ttft_recorded = False
80
+ self.stop_reason = StopReason.ML_STOP_REASON_UNKNOWN
81
+ self.prompt_tokens = 0
82
+ self.generated_tokens = 0
83
+
84
+ # --------------------------------------------------------------------------------------
85
+ # Profiling functions (similar to profile.h functions)
86
+ # --------------------------------------------------------------------------------------
87
+
88
+ def profiling_reset(ctx: ProfilingContext) -> None:
89
+ """Reset profiling context (ml_profiling_reset)."""
90
+ ctx.reset()
91
+
92
+ def profiling_start(ctx: ProfilingContext) -> None:
93
+ """Start profiling (ml_profiling_start)."""
94
+ ctx.start_time = time.perf_counter()
95
+ ctx.prompt_start_time = ctx.start_time
96
+
97
+ def profiling_prompt_start(ctx: ProfilingContext) -> None:
98
+ """Start prompt processing timing (ml_profiling_prompt_start)."""
99
+ ctx.prompt_start_time = time.perf_counter()
100
+
101
+ def profiling_prompt_end(ctx: ProfilingContext) -> None:
102
+ """End prompt processing timing (ml_profiling_prompt_end)."""
103
+ ctx.prompt_end_time = time.perf_counter()
104
+
105
+ def profiling_decode_start(ctx: ProfilingContext) -> None:
106
+ """Start decode timing (ml_profiling_decode_start)."""
107
+ ctx.decode_start_time = time.perf_counter()
108
+
109
+ def profiling_decode_end(ctx: ProfilingContext) -> None:
110
+ """End decode timing (ml_profiling_decode_end)."""
111
+ ctx.decode_end_time = time.perf_counter()
112
+
113
+ def profiling_record_ttft(ctx: ProfilingContext) -> None:
114
+ """Record time to first token (ml_profiling_record_ttft)."""
115
+ if not ctx.ttft_recorded and ctx.start_time is not None:
116
+ ctx.first_token_time = time.perf_counter()
117
+ ctx.ttft_recorded = True
118
+
119
+ def profiling_update_prompt_tokens(ctx: ProfilingContext, prompt_tokens: int) -> None:
120
+ """Update prompt token count (ml_profiling_update_prompt_tokens)."""
121
+ ctx.prompt_tokens = prompt_tokens
122
+
123
+ def profiling_update_generated_tokens(ctx: ProfilingContext, generated_tokens: int) -> None:
124
+ """Update generated token count (ml_profiling_update_generated_tokens)."""
125
+ ctx.generated_tokens = generated_tokens
126
+
127
+ def profiling_stop_reason(ctx: ProfilingContext, stop_reason: int) -> None:
128
+ """Set stop reason (ml_profiling_stop_reason)."""
129
+ ctx.stop_reason = stop_reason
130
+
131
+ def profiling_end(ctx: ProfilingContext) -> None:
132
+ """End profiling (ml_profiling_end)."""
133
+ ctx.end_time = time.perf_counter()
134
+
135
+ def profiling_gen_data(ctx: ProfilingContext) -> ProfilingData:
136
+ """Generate profiling data from context (ml_profiling_gen_data)."""
137
+ data = ProfilingData()
138
+
139
+ if ctx.start_time is None or ctx.end_time is None:
140
+ return data
141
+
142
+ # Calculate total time
143
+ data.total_time_us = int((ctx.end_time - ctx.start_time) * 1_000_000)
144
+
145
+ # Calculate prompt time
146
+ if ctx.prompt_start_time is not None and ctx.prompt_end_time is not None:
147
+ data.prompt_time_us = int((ctx.prompt_end_time - ctx.prompt_start_time) * 1_000_000)
148
+
149
+ # Calculate decode time
150
+ if ctx.decode_start_time is not None and ctx.decode_end_time is not None:
151
+ data.decode_time_us = int((ctx.decode_end_time - ctx.decode_start_time) * 1_000_000)
152
+
153
+ # Calculate TTFT
154
+ if ctx.first_token_time is not None and ctx.start_time is not None:
155
+ data.ttft_us = int((ctx.first_token_time - ctx.start_time) * 1_000_000)
156
+
157
+ # Set token counts
158
+ data.prompt_tokens = ctx.prompt_tokens
159
+ data.generated_tokens = ctx.generated_tokens
160
+ data.total_tokens = ctx.prompt_tokens + ctx.generated_tokens
161
+
162
+ # Calculate tokens per second
163
+ if data.decode_time_us > 0:
164
+ data.tokens_per_second = (data.generated_tokens * 1_000_000.0) / data.decode_time_us
165
+
166
+ # Set stop reason
167
+ data.stop_reason = ctx.stop_reason
168
+
169
+ return data
170
+
171
+ def stop_reason_to_string(reason: int) -> str:
172
+ """Convert stop reason to string (stop_reason_to_string)."""
173
+ try:
174
+ return StopReason(reason).name
175
+ except ValueError:
176
+ return f"UNKNOWN({reason})"
177
+
178
+ # --------------------------------------------------------------------------------------
179
+ # Profiling mixin for model classes
180
+ # --------------------------------------------------------------------------------------
181
+
182
+ class ProfilingMixin:
183
+ """Mixin class to add profiling capabilities to model classes."""
184
+
185
+ def __init__(self):
186
+ """Initialize profiling mixin."""
187
+ self._profiling_context = ProfilingContext()
188
+ self._profiling_data = ProfilingData()
189
+
190
+ def _start_profiling(self) -> None:
191
+ """Start profiling for an operation."""
192
+ profiling_reset(self._profiling_context)
193
+ profiling_start(self._profiling_context)
194
+
195
+ def _prompt_start(self) -> None:
196
+ """Start prompt processing timing."""
197
+ profiling_prompt_start(self._profiling_context)
198
+
199
+ def _prompt_end(self) -> None:
200
+ """End prompt processing timing."""
201
+ profiling_prompt_end(self._profiling_context)
202
+
203
+ def _decode_start(self) -> None:
204
+ """Start decode timing."""
205
+ profiling_decode_start(self._profiling_context)
206
+
207
+ def _decode_end(self) -> None:
208
+ """End decode timing."""
209
+ profiling_decode_end(self._profiling_context)
210
+
211
+ def _record_ttft(self) -> None:
212
+ """Record time to first token."""
213
+ profiling_record_ttft(self._profiling_context)
214
+
215
+ def _update_prompt_tokens(self, prompt_tokens: int) -> None:
216
+ """Update prompt token count."""
217
+ profiling_update_prompt_tokens(self._profiling_context, prompt_tokens)
218
+
219
+ def _update_generated_tokens(self, generated_tokens: int) -> None:
220
+ """Update generated token count."""
221
+ profiling_update_generated_tokens(self._profiling_context, generated_tokens)
222
+
223
+ def _set_stop_reason(self, stop_reason: int) -> None:
224
+ """Set stop reason."""
225
+ profiling_stop_reason(self._profiling_context, stop_reason)
226
+
227
+ def _end_profiling(self) -> ProfilingData:
228
+ """End profiling and return data."""
229
+ profiling_end(self._profiling_context)
230
+ self._profiling_data = profiling_gen_data(self._profiling_context)
231
+ return self._profiling_data
232
+
233
+ def get_profiling_data(self) -> ProfilingData:
234
+ """Get profiling data for the last operation."""
235
+ return self._profiling_data
236
+
237
+ def reset_profiling(self) -> None:
238
+ """Reset profiling data."""
239
+ self._profiling_data.reset()
Binary file
Binary file
nexaai/common.py ADDED
@@ -0,0 +1,104 @@
1
+ from dataclasses import dataclass
2
+ from typing import TypedDict, Literal, Optional, List
3
+ from enum import Enum
4
+
5
+
6
+ class PluginID(str, Enum):
7
+ """Enum for plugin identifiers."""
8
+ MLX = "mlx"
9
+ LLAMA_CPP = "llama_cpp"
10
+ NEXAML = "nexaml"
11
+
12
+
13
+ class ChatMessage(TypedDict):
14
+ role: Literal["user", "assistant", "system"]
15
+ content: str
16
+
17
+ class MultiModalMessageContent(TypedDict):
18
+ type: Literal["text", "image", "audio", "video"]
19
+ text: Optional[str]
20
+ url: Optional[str]
21
+ path: Optional[str]
22
+
23
+ class MultiModalMessage(TypedDict):
24
+ role: Literal["user", "assistant", "system"]
25
+ content: List[MultiModalMessageContent]
26
+
27
+
28
+ @dataclass
29
+ class SamplerConfig:
30
+ temperature: float = 0.8
31
+ top_p: float = 0.95
32
+ top_k: int = 40
33
+ repetition_penalty: float = 1.0
34
+ presence_penalty: float = 0.0
35
+ frequency_penalty: float = 0.0
36
+ seed: int = -1
37
+ grammar_path: str = None
38
+ grammar_string: str = None
39
+
40
+ @dataclass
41
+ class GenerationConfig:
42
+ max_tokens: int = 1024
43
+ stop_words: list[str] = None
44
+ sampler_config: SamplerConfig = None
45
+ image_paths: list[str] = None
46
+ audio_paths: list[str] = None
47
+
48
+ @dataclass
49
+ class ModelConfig:
50
+ n_ctx: int = 4096
51
+ n_threads: int = None
52
+ n_threads_batch: int = None
53
+ n_batch: int = 512
54
+ n_ubatch: int = 512
55
+ n_seq_max: int = 1
56
+ n_gpu_layers: int = 999
57
+ chat_template_path: str = None
58
+ chat_template_content: str = None
59
+
60
+
61
+ @dataclass(frozen=True) # Read-only
62
+ class ProfilingData:
63
+ """Profiling data structure for LLM/VLM performance metrics."""
64
+ ttft: int = 0 # Time to first token (us)
65
+ prompt_time: int = 0 # Prompt processing time (us)
66
+ decode_time: int = 0 # Token generation time (us)
67
+ prompt_tokens: int = 0 # Number of prompt tokens
68
+ generated_tokens: int = 0 # Number of generated tokens
69
+ audio_duration: int = 0 # Audio duration (us)
70
+ prefill_speed: float = 0.0 # Prefill speed (tokens/sec)
71
+ decoding_speed: float = 0.0 # Decoding speed (tokens/sec)
72
+ real_time_factor: float = 0.0 # Real-Time Factor (RTF)
73
+ stop_reason: str = "" # Stop reason: "eos", "length", "user", "stop_sequence"
74
+
75
+ @classmethod
76
+ def from_dict(cls, data: dict) -> "ProfilingData":
77
+ """Create ProfilingData from dictionary."""
78
+ return cls(
79
+ ttft=data.get("ttft", 0),
80
+ prompt_time=data.get("prompt_time", 0),
81
+ decode_time=data.get("decode_time", 0),
82
+ prompt_tokens=data.get("prompt_tokens", 0),
83
+ generated_tokens=data.get("generated_tokens", 0),
84
+ audio_duration=data.get("audio_duration", 0),
85
+ prefill_speed=data.get("prefill_speed", 0.0),
86
+ decoding_speed=data.get("decoding_speed", 0.0),
87
+ real_time_factor=data.get("real_time_factor", 0.0),
88
+ stop_reason=data.get("stop_reason", "")
89
+ )
90
+
91
+ def to_dict(self) -> dict:
92
+ """Convert to dictionary."""
93
+ return {
94
+ "ttft": self.ttft,
95
+ "prompt_time": self.prompt_time,
96
+ "decode_time": self.decode_time,
97
+ "prompt_tokens": self.prompt_tokens,
98
+ "generated_tokens": self.generated_tokens,
99
+ "audio_duration": self.audio_duration,
100
+ "prefill_speed": self.prefill_speed,
101
+ "decoding_speed": self.decoding_speed,
102
+ "real_time_factor": self.real_time_factor,
103
+ "stop_reason": self.stop_reason
104
+ }
nexaai/cv.py ADDED
@@ -0,0 +1,92 @@
1
+ from typing import List, Optional, Union
2
+ from abc import abstractmethod
3
+ from dataclasses import dataclass
4
+
5
+ from nexaai.base import BaseModel
6
+ from nexaai.common import PluginID
7
+
8
+
9
+ @dataclass
10
+ class BoundingBox:
11
+ """Generic bounding box structure."""
12
+ x: float # X coordinate (normalized or pixel, depends on model)
13
+ y: float # Y coordinate (normalized or pixel, depends on model)
14
+ width: float # Width
15
+ height: float # Height
16
+
17
+
18
+ @dataclass
19
+ class CVResult:
20
+ """Generic detection/classification result."""
21
+ image_paths: Optional[List[str]] = None # Output image paths
22
+ image_count: int = 0 # Number of output images
23
+ class_id: int = 0 # Class ID (example: ConvNext)
24
+ confidence: float = 0.0 # Confidence score [0.0-1.0]
25
+ bbox: Optional[BoundingBox] = None # Bounding box (example: YOLO)
26
+ text: Optional[str] = None # Text result (example: OCR)
27
+ embedding: Optional[List[float]] = None # Feature embedding (example: CLIP embedding)
28
+ embedding_dim: int = 0 # Embedding dimension
29
+
30
+
31
+ @dataclass
32
+ class CVResults:
33
+ """Generic CV inference result."""
34
+ results: List[CVResult] # Array of CV results
35
+ result_count: int # Number of CV results
36
+
37
+
38
+ class CVCapabilities:
39
+ """CV capabilities enum."""
40
+ OCR = 0 # OCR
41
+ CLASSIFICATION = 1 # Classification
42
+ SEGMENTATION = 2 # Segmentation
43
+ CUSTOM = 3 # Custom task
44
+
45
+
46
+ @dataclass
47
+ class CVModelConfig:
48
+ """CV model preprocessing configuration."""
49
+ capabilities: int # CVCapabilities
50
+
51
+ # MLX-OCR
52
+ det_model_path: Optional[str] = None # Detection model path
53
+ rec_model_path: Optional[str] = None # Recognition model path
54
+
55
+ # QNN
56
+ model_path: Optional[str] = None # Model path
57
+ system_library_path: Optional[str] = None # System library path
58
+ backend_library_path: Optional[str] = None # Backend library path
59
+ extension_library_path: Optional[str] = None # Extension library path
60
+ config_file_path: Optional[str] = None # Config file path
61
+ char_dict_path: Optional[str] = None # Character dictionary path
62
+
63
+
64
+ class CVModel(BaseModel):
65
+ """Abstract base class for generic computer vision models."""
66
+
67
+ def __init__(self):
68
+ """Initialize base CV model class."""
69
+ pass
70
+
71
+ @classmethod
72
+ def _load_from(cls,
73
+ _: str, # TODO: remove this argument, this is a hack to make api design happy
74
+ config: CVModelConfig,
75
+ plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
76
+ device_id: Optional[str] = None
77
+ ) -> 'CVModel':
78
+ """Load CV model from configuration, routing to appropriate implementation."""
79
+ # Check plugin_id value for routing - handle both enum and string
80
+ plugin_value = plugin_id.value if isinstance(plugin_id, PluginID) else plugin_id
81
+
82
+ if plugin_value == "mlx":
83
+ from nexaai.cv_impl.mlx_cv_impl import MLXCVImpl
84
+ return MLXCVImpl._load_from(config, plugin_id, device_id)
85
+ else:
86
+ from nexaai.cv_impl.pybind_cv_impl import PyBindCVImpl
87
+ return PyBindCVImpl._load_from(config, plugin_id, device_id)
88
+
89
+ @abstractmethod
90
+ def infer(self, input_image_path: str) -> CVResults:
91
+ """Perform inference on image."""
92
+ pass
File without changes
@@ -0,0 +1,89 @@
1
+ # Note: This code is generated by Cursor, not tested yet.
2
+
3
+ from typing import Optional, Union
4
+ import os
5
+
6
+ from nexaai.common import PluginID
7
+ from nexaai.cv import CVModel, CVModelConfig, CVResults
8
+ from nexaai.mlx_backend.cv.interface import CVModel as MLXCVInterface, create_cv_model
9
+
10
+
11
+ class MLXCVImpl(CVModel):
12
+ def __init__(self):
13
+ """Initialize MLX CV implementation."""
14
+ super().__init__()
15
+ self._mlx_cv = None
16
+
17
+ @classmethod
18
+ def _load_from(cls,
19
+ config: CVModelConfig,
20
+ plugin_id: Union[PluginID, str] = PluginID.MLX,
21
+ device_id: Optional[str] = None
22
+ ) -> 'MLXCVImpl':
23
+ """Load CV model from configuration using MLX backend."""
24
+ try:
25
+ # Get MLX config class
26
+ from nexaai.mlx_backend.ml import CVModelConfig as MLXCVModelConfig
27
+
28
+ # Convert our config to MLX format
29
+ mlx_config = MLXCVModelConfig(
30
+ capabilities=config.capabilities,
31
+ det_model_path=config.det_model_path,
32
+ rec_model_path=config.rec_model_path,
33
+ model_path=config.model_path,
34
+ system_library_path=config.system_library_path,
35
+ backend_library_path=config.backend_library_path,
36
+ extension_library_path=config.extension_library_path,
37
+ config_file_path=config.config_file_path,
38
+ char_dict_path=config.char_dict_path
39
+ )
40
+
41
+ # Create instance and load MLX CV model
42
+ instance = cls()
43
+ instance._mlx_cv = create_cv_model(mlx_config, device_id)
44
+
45
+ return instance
46
+ except Exception as e:
47
+ raise RuntimeError(f"Failed to load MLX CV: {str(e)}")
48
+
49
+ def eject(self):
50
+ """Destroy the model and free resources."""
51
+ if self._mlx_cv:
52
+ self._mlx_cv.destroy()
53
+ self._mlx_cv = None
54
+
55
+ def infer(self, input_image_path: str) -> CVResults:
56
+ """Perform inference on image."""
57
+ if not self._mlx_cv:
58
+ raise RuntimeError("MLX CV not loaded")
59
+
60
+ try:
61
+ # Use MLX CV inference
62
+ result = self._mlx_cv.infer(input_image_path)
63
+
64
+ # Convert MLX result to our format
65
+ from nexaai.cv import CVResult
66
+
67
+ our_results = []
68
+ for mlx_result in result.results:
69
+ our_result = CVResult(
70
+ image_paths=mlx_result.image_paths,
71
+ image_count=mlx_result.image_count,
72
+ class_id=mlx_result.class_id,
73
+ confidence=mlx_result.confidence,
74
+ bbox=mlx_result.bbox,
75
+ text=mlx_result.text,
76
+ embedding=mlx_result.embedding,
77
+ embedding_dim=mlx_result.embedding_dim
78
+ )
79
+ our_results.append(our_result)
80
+
81
+ return CVResults(
82
+ results=our_results,
83
+ result_count=result.result_count
84
+ )
85
+
86
+ except Exception as e:
87
+ raise RuntimeError(f"Failed to perform CV inference: {str(e)}")
88
+
89
+
@@ -0,0 +1,32 @@
1
+ from typing import Optional, Union
2
+
3
+ from nexaai.common import PluginID
4
+ from nexaai.cv import CVModel, CVModelConfig, CVResults
5
+
6
+
7
+ class PyBindCVImpl(CVModel):
8
+ def __init__(self):
9
+ """Initialize PyBind CV implementation."""
10
+ super().__init__()
11
+ # TODO: Add PyBind-specific initialization
12
+
13
+ @classmethod
14
+ def _load_from(cls,
15
+ config: CVModelConfig,
16
+ plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP,
17
+ device_id: Optional[str] = None
18
+ ) -> 'PyBindCVImpl':
19
+ """Load CV model from configuration using PyBind backend."""
20
+ # TODO: Implement PyBind CV loading
21
+ instance = cls()
22
+ return instance
23
+
24
+ def eject(self):
25
+ """Destroy the model and free resources."""
26
+ # TODO: Implement PyBind CV cleanup
27
+ pass
28
+
29
+ def infer(self, input_image_path: str) -> CVResults:
30
+ """Perform inference on image."""
31
+ # TODO: Implement PyBind CV inference
32
+ raise NotImplementedError("PyBind CV inference not yet implemented")
nexaai/embedder.py ADDED
@@ -0,0 +1,72 @@
1
+ from typing import List, Union
2
+ from dataclasses import dataclass
3
+ from abc import abstractmethod
4
+ import numpy as np
5
+
6
+ from nexaai.base import BaseModel
7
+ from nexaai.common import PluginID
8
+
9
+
10
+ @dataclass
11
+ class EmbeddingConfig:
12
+ batch_size: int = 32
13
+ normalize: bool = True
14
+ normalize_method: str = "l2"
15
+
16
+
17
+ class Embedder(BaseModel):
18
+ def __init__(self):
19
+ """
20
+ Internal initializer
21
+ """
22
+ pass
23
+
24
+ @classmethod
25
+ def _load_from(cls, model_path: str, tokenizer_file: str = "tokenizer.json", plugin_id: Union[PluginID, str] = PluginID.LLAMA_CPP):
26
+ """
27
+ Load an embedder from model files, routing to appropriate implementation.
28
+
29
+ Args:
30
+ model_path: Path to the model file
31
+ tokenizer_file: Path to the tokenizer file (default: "tokenizer.json")
32
+ plugin_id: Plugin ID to use for the model (default: PluginID.LLAMA_CPP)
33
+
34
+ Returns:
35
+ Embedder instance
36
+ """
37
+ # Check plugin_id value for routing - handle both enum and string
38
+ plugin_value = plugin_id.value if isinstance(plugin_id, PluginID) else plugin_id
39
+
40
+ if plugin_value == "mlx":
41
+ from nexaai.embedder_impl.mlx_embedder_impl import MLXEmbedderImpl
42
+ return MLXEmbedderImpl._load_from(model_path, tokenizer_file, plugin_id)
43
+ else:
44
+ from nexaai.embedder_impl.pybind_embedder_impl import PyBindEmbedderImpl
45
+ return PyBindEmbedderImpl._load_from(model_path, tokenizer_file, plugin_id)
46
+
47
+ @abstractmethod
48
+ def generate(self, texts: Union[List[str], str] = None, config: EmbeddingConfig = EmbeddingConfig(), input_ids: Union[List[int], List[List[int]]] = None) -> np.ndarray:
49
+ """
50
+ Generate embeddings for the given texts or input_ids.
51
+
52
+ Args:
53
+ texts: List of strings or single string to embed
54
+ input_ids: Pre-tokenized input as:
55
+ - Single sequence: list of integers [1, 2, 3, 4]
56
+ - Multiple sequences: list of lists [[1, 2, 3], [4, 5, 6]]
57
+ config: Configuration for embedding generation
58
+
59
+ Returns:
60
+ numpy array of embeddings with shape (num_sequences, embedding_dim)
61
+ """
62
+ pass
63
+
64
+ @abstractmethod
65
+ def get_embedding_dim(self) -> int:
66
+ """
67
+ Get the embedding dimension of the model
68
+
69
+ Returns:
70
+ The embedding dimension in int
71
+ """
72
+ pass
File without changes