nexaai 1.0.19rc6__cp310-cp310-macosx_14_0_universal2.whl → 1.0.19rc7__cp310-cp310-macosx_14_0_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nexaai might be problematic. Click here for more details.

Files changed (224) hide show
  1. nexaai/_stub.cpython-310-darwin.so +0 -0
  2. nexaai/_version.py +1 -1
  3. nexaai/binds/libnexa_bridge.dylib +0 -0
  4. nexaai/binds/nexa_llama_cpp/libggml-base.dylib +0 -0
  5. nexaai/binds/nexa_llama_cpp/libggml-cpu.so +0 -0
  6. nexaai/binds/nexa_llama_cpp/libggml-metal.so +0 -0
  7. nexaai/binds/nexa_llama_cpp/libggml.dylib +0 -0
  8. nexaai/binds/nexa_llama_cpp/libllama.dylib +0 -0
  9. nexaai/binds/nexa_llama_cpp/libmtmd.dylib +0 -0
  10. nexaai/binds/nexa_llama_cpp/libnexa_plugin.dylib +0 -0
  11. nexaai/binds/nexa_mlx/libnexa_plugin.dylib +0 -0
  12. nexaai/binds/nexa_mlx/py-lib/asr/__init__.py +12 -0
  13. nexaai/binds/nexa_mlx/py-lib/asr/interface.py +122 -0
  14. nexaai/binds/nexa_mlx/py-lib/common/__init__.py +0 -0
  15. nexaai/binds/nexa_mlx/py-lib/common/utils.py +25 -0
  16. nexaai/binds/nexa_mlx/py-lib/cv/__init__.py +0 -0
  17. nexaai/binds/nexa_mlx/py-lib/cv/generate.py +195 -0
  18. nexaai/binds/nexa_mlx/py-lib/cv/interface.py +151 -0
  19. nexaai/binds/nexa_mlx/py-lib/cv/main.py +81 -0
  20. nexaai/binds/nexa_mlx/py-lib/cv/modeling/pp_ocr_v4.py +1736 -0
  21. nexaai/binds/nexa_mlx/py-lib/embedding/__init__.py +0 -0
  22. nexaai/binds/nexa_mlx/py-lib/embedding/generate.py +333 -0
  23. nexaai/binds/nexa_mlx/py-lib/embedding/interface.py +617 -0
  24. nexaai/binds/nexa_mlx/py-lib/embedding/main.py +173 -0
  25. nexaai/binds/nexa_mlx/py-lib/embedding/modeling/__init__.py +0 -0
  26. nexaai/binds/nexa_mlx/py-lib/embedding/modeling/nexa_jina_v2.py +399 -0
  27. nexaai/binds/nexa_mlx/py-lib/image_gen/__init__.py +1 -0
  28. nexaai/binds/nexa_mlx/py-lib/image_gen/generate_sd.py +244 -0
  29. nexaai/binds/nexa_mlx/py-lib/image_gen/interface.py +82 -0
  30. nexaai/binds/nexa_mlx/py-lib/image_gen/main.py +281 -0
  31. nexaai/binds/nexa_mlx/py-lib/image_gen/stable_diffusion/__init__.py +306 -0
  32. nexaai/binds/nexa_mlx/py-lib/image_gen/stable_diffusion/clip.py +116 -0
  33. nexaai/binds/nexa_mlx/py-lib/image_gen/stable_diffusion/config.py +65 -0
  34. nexaai/binds/nexa_mlx/py-lib/image_gen/stable_diffusion/model_io.py +386 -0
  35. nexaai/binds/nexa_mlx/py-lib/image_gen/stable_diffusion/sampler.py +105 -0
  36. nexaai/binds/nexa_mlx/py-lib/image_gen/stable_diffusion/tokenizer.py +100 -0
  37. nexaai/binds/nexa_mlx/py-lib/image_gen/stable_diffusion/unet.py +460 -0
  38. nexaai/binds/nexa_mlx/py-lib/image_gen/stable_diffusion/vae.py +274 -0
  39. nexaai/binds/nexa_mlx/py-lib/llm/__init__.py +0 -0
  40. nexaai/binds/nexa_mlx/py-lib/llm/generate.py +149 -0
  41. nexaai/binds/nexa_mlx/py-lib/llm/interface.py +764 -0
  42. nexaai/binds/nexa_mlx/py-lib/llm/main.py +68 -0
  43. nexaai/binds/nexa_mlx/py-lib/rerank/__init__.py +0 -0
  44. nexaai/binds/nexa_mlx/py-lib/rerank/generate.py +174 -0
  45. nexaai/binds/nexa_mlx/py-lib/rerank/interface.py +287 -0
  46. nexaai/binds/nexa_mlx/py-lib/rerank/main.py +127 -0
  47. nexaai/binds/nexa_mlx/py-lib/rerank/modeling/__init__.py +0 -0
  48. nexaai/binds/nexa_mlx/py-lib/rerank/modeling/nexa_jina_rerank.py +330 -0
  49. nexaai/binds/nexa_mlx/py-lib/sd/__init__.py +1 -0
  50. nexaai/binds/nexa_mlx/py-lib/sd/interface.py +362 -0
  51. nexaai/binds/nexa_mlx/py-lib/sd/main.py +286 -0
  52. nexaai/binds/nexa_mlx/py-lib/sd/modeling/__init__.py +306 -0
  53. nexaai/binds/nexa_mlx/py-lib/sd/modeling/clip.py +116 -0
  54. nexaai/binds/nexa_mlx/py-lib/sd/modeling/config.py +65 -0
  55. nexaai/binds/nexa_mlx/py-lib/sd/modeling/model_io.py +385 -0
  56. nexaai/binds/nexa_mlx/py-lib/sd/modeling/sampler.py +105 -0
  57. nexaai/binds/nexa_mlx/py-lib/sd/modeling/tokenizer.py +100 -0
  58. nexaai/binds/nexa_mlx/py-lib/sd/modeling/unet.py +460 -0
  59. nexaai/binds/nexa_mlx/py-lib/sd/modeling/vae.py +274 -0
  60. nexaai/binds/nexa_mlx/py-lib/tts/__init__.py +12 -0
  61. nexaai/binds/nexa_mlx/py-lib/tts/interface.py +276 -0
  62. nexaai/binds/nexa_mlx/py-lib/vlm/__init__.py +3 -0
  63. nexaai/binds/nexa_mlx/py-lib/vlm/generate.py +572 -0
  64. nexaai/binds/nexa_mlx/py-lib/vlm/generate_qwen3_vl.py +294 -0
  65. nexaai/binds/nexa_mlx/py-lib/vlm/generate_qwen3_vl_moe.py +276 -0
  66. nexaai/binds/nexa_mlx/py-lib/vlm/interface.py +504 -0
  67. nexaai/binds/nexa_mlx/py-lib/vlm/main.py +320 -0
  68. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/__init__.py +0 -0
  69. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/convert.py +68 -0
  70. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/__init__.py +0 -0
  71. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/aya_vision/__init__.py +8 -0
  72. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/aya_vision/aya_vision.py +193 -0
  73. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/aya_vision/interpolate.py +186 -0
  74. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/aya_vision/language.py +233 -0
  75. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/aya_vision/vision.py +503 -0
  76. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/base.py +202 -0
  77. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/cache.py +230 -0
  78. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/deepseek_vl_v2/__init__.py +10 -0
  79. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/deepseek_vl_v2/conversation.py +264 -0
  80. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/deepseek_vl_v2/deepseek_vl_v2.py +472 -0
  81. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/deepseek_vl_v2/language.py +591 -0
  82. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/deepseek_vl_v2/processing_deepsek_vl_v2.py +526 -0
  83. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/deepseek_vl_v2/vision.py +356 -0
  84. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/florence2/__init__.py +8 -0
  85. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/florence2/florence2.py +366 -0
  86. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/florence2/language.py +488 -0
  87. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/florence2/vision.py +591 -0
  88. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/gemma3/__init__.py +8 -0
  89. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/gemma3/gemma3.py +213 -0
  90. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/gemma3/language.py +315 -0
  91. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/gemma3/vision.py +238 -0
  92. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/gemma3n/__init__.py +2 -0
  93. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/gemma3n/audio.py +1038 -0
  94. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/gemma3n/config.py +139 -0
  95. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/gemma3n/gemma3n.py +322 -0
  96. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/gemma3n/language.py +629 -0
  97. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/gemma3n/vision.py +1022 -0
  98. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/idefics2/__init__.py +9 -0
  99. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/idefics2/idefics2.py +294 -0
  100. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/idefics2/language.py +191 -0
  101. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/idefics2/vision.py +267 -0
  102. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/idefics3/__init__.py +8 -0
  103. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/idefics3/idefics3.py +175 -0
  104. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/idefics3/language.py +192 -0
  105. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/idefics3/vision.py +233 -0
  106. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/internvl_chat/__init__.py +9 -0
  107. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/internvl_chat/internvl_chat.py +140 -0
  108. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/internvl_chat/language.py +220 -0
  109. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/internvl_chat/processor.py +393 -0
  110. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/internvl_chat/vision.py +293 -0
  111. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/kernels.py +307 -0
  112. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/kimi_vl/__init__.py +8 -0
  113. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/kimi_vl/kimi_vl.py +143 -0
  114. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/kimi_vl/language.py +509 -0
  115. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/kimi_vl/vision.py +522 -0
  116. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/llama4/__init__.py +8 -0
  117. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/llama4/language.py +386 -0
  118. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/llama4/llama4.py +138 -0
  119. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/llama4/vision.py +560 -0
  120. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/llava/__init__.py +8 -0
  121. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/llava/language.py +240 -0
  122. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/llava/llava.py +153 -0
  123. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/llava/vision.py +259 -0
  124. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/llava_bunny/__init__.py +9 -0
  125. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/llava_bunny/language.py +236 -0
  126. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/llava_bunny/llava_bunny.py +256 -0
  127. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/llava_bunny/vision.py +303 -0
  128. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/llava_next/__init__.py +8 -0
  129. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/llava_next/language.py +230 -0
  130. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/llava_next/llava_next.py +160 -0
  131. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/llava_next/vision.py +243 -0
  132. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/mistral3/__init__.py +8 -0
  133. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/mistral3/mistral3.py +283 -0
  134. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/mllama/__init__.py +8 -0
  135. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/mllama/language.py +416 -0
  136. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/mllama/mllama.py +172 -0
  137. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/mllama/vision.py +499 -0
  138. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/molmo/__init__.py +8 -0
  139. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/molmo/language.py +243 -0
  140. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/molmo/molmo.py +133 -0
  141. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/molmo/vision.py +465 -0
  142. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/multi_modality/__init__.py +10 -0
  143. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/multi_modality/language.py +230 -0
  144. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/multi_modality/multi_modality.py +385 -0
  145. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/multi_modality/sam.py +557 -0
  146. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/multi_modality/vision.py +526 -0
  147. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/paligemma/__init__.py +8 -0
  148. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/paligemma/language.py +282 -0
  149. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/paligemma/paligemma.py +160 -0
  150. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/paligemma/vision.py +242 -0
  151. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/phi3_v/__init__.py +8 -0
  152. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/phi3_v/language.py +21 -0
  153. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/phi3_v/phi3_v.py +243 -0
  154. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/phi3_v/su_rope.py +71 -0
  155. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/phi3_v/vision.py +324 -0
  156. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/pixtral/__init__.py +8 -0
  157. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/pixtral/language.py +229 -0
  158. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/pixtral/pixtral.py +161 -0
  159. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/pixtral/vision.py +320 -0
  160. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen2_5_vl/__init__.py +2 -0
  161. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen2_5_vl/config.py +108 -0
  162. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen2_5_vl/language.py +490 -0
  163. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen2_5_vl/qwen2_5_vl.py +168 -0
  164. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen2_5_vl/vision.py +414 -0
  165. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen2_vl/__init__.py +2 -0
  166. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen2_vl/config.py +104 -0
  167. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen2_vl/language.py +490 -0
  168. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen2_vl/qwen2_vl.py +167 -0
  169. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen2_vl/vision.py +312 -0
  170. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen3_vl/llm_common/__init__.py +0 -0
  171. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen3_vl/llm_common/base.py +117 -0
  172. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen3_vl/llm_common/cache.py +531 -0
  173. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen3_vl/llm_common/generate.py +701 -0
  174. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen3_vl/llm_common/rope_utils.py +255 -0
  175. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen3_vl/llm_common/sample_utils.py +303 -0
  176. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen3_vl/llm_common/tokenizer_utils.py +407 -0
  177. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen3_vl/processor.py +476 -0
  178. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen3_vl/qwen3vl.py +1223 -0
  179. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen3vl_moe/llm_common/__init__.py +0 -0
  180. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen3vl_moe/llm_common/base.py +117 -0
  181. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen3vl_moe/llm_common/cache.py +531 -0
  182. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen3vl_moe/llm_common/generate.py +701 -0
  183. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen3vl_moe/llm_common/rope_utils.py +255 -0
  184. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen3vl_moe/llm_common/sample_utils.py +303 -0
  185. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen3vl_moe/llm_common/tokenizer_utils.py +407 -0
  186. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen3vl_moe/processor.py +476 -0
  187. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen3vl_moe/qwen3vl_moe.py +1309 -0
  188. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen3vl_moe/switch_layers.py +210 -0
  189. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/smolvlm/__init__.py +8 -0
  190. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/smolvlm/smolvlm.py +62 -0
  191. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/processing_qwen2_5_vl.py +209 -0
  192. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/processing_qwen2_vl.py +215 -0
  193. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/prompt_utils.py +474 -0
  194. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/sample_utils.py +39 -0
  195. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/tokenizer_utils.py +344 -0
  196. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/trainer/__init__.py +9 -0
  197. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/trainer/lora.py +70 -0
  198. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/trainer/trainer.py +296 -0
  199. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/trainer/utils.py +160 -0
  200. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/utils.py +928 -0
  201. nexaai/binds/nexa_nexaml/libggml-base.dylib +0 -0
  202. nexaai/binds/nexa_nexaml/libggml-cpu.so +0 -0
  203. nexaai/binds/nexa_nexaml/libggml-metal.so +0 -0
  204. nexaai/binds/nexa_nexaml/libggml.dylib +0 -0
  205. nexaai/mlx_backend/vlm/generate_qwen3_vl_moe.py +276 -0
  206. nexaai/mlx_backend/vlm/interface.py +21 -4
  207. nexaai/mlx_backend/vlm/main.py +6 -2
  208. nexaai/mlx_backend/vlm/modeling/models/qwen3vl_moe/llm_common/__init__.py +0 -0
  209. nexaai/mlx_backend/vlm/modeling/models/qwen3vl_moe/llm_common/base.py +117 -0
  210. nexaai/mlx_backend/vlm/modeling/models/qwen3vl_moe/llm_common/cache.py +531 -0
  211. nexaai/mlx_backend/vlm/modeling/models/qwen3vl_moe/llm_common/generate.py +701 -0
  212. nexaai/mlx_backend/vlm/modeling/models/qwen3vl_moe/llm_common/rope_utils.py +255 -0
  213. nexaai/mlx_backend/vlm/modeling/models/qwen3vl_moe/llm_common/sample_utils.py +303 -0
  214. nexaai/mlx_backend/vlm/modeling/models/qwen3vl_moe/llm_common/tokenizer_utils.py +407 -0
  215. nexaai/mlx_backend/vlm/modeling/models/qwen3vl_moe/processor.py +476 -0
  216. nexaai/mlx_backend/vlm/modeling/models/qwen3vl_moe/qwen3vl_moe.py +1309 -0
  217. nexaai/mlx_backend/vlm/modeling/models/qwen3vl_moe/switch_layers.py +210 -0
  218. nexaai/utils/manifest_utils.py +222 -15
  219. nexaai/utils/model_manager.py +83 -7
  220. nexaai/utils/model_types.py +2 -0
  221. {nexaai-1.0.19rc6.dist-info → nexaai-1.0.19rc7.dist-info}/METADATA +1 -1
  222. {nexaai-1.0.19rc6.dist-info → nexaai-1.0.19rc7.dist-info}/RECORD +224 -24
  223. {nexaai-1.0.19rc6.dist-info → nexaai-1.0.19rc7.dist-info}/WHEEL +0 -0
  224. {nexaai-1.0.19rc6.dist-info → nexaai-1.0.19rc7.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,274 @@
1
+ # Copyright © 2023 Apple Inc.
2
+
3
+ import math
4
+ from typing import List
5
+
6
+ import mlx.core as mx
7
+ import mlx.nn as nn
8
+
9
+ from .config import AutoencoderConfig
10
+ from .unet import ResnetBlock2D, upsample_nearest
11
+
12
+
13
+ class Attention(nn.Module):
14
+ """A single head unmasked attention for use with the VAE."""
15
+
16
+ def __init__(self, dims: int, norm_groups: int = 32):
17
+ super().__init__()
18
+
19
+ self.group_norm = nn.GroupNorm(norm_groups, dims, pytorch_compatible=True)
20
+ self.query_proj = nn.Linear(dims, dims)
21
+ self.key_proj = nn.Linear(dims, dims)
22
+ self.value_proj = nn.Linear(dims, dims)
23
+ self.out_proj = nn.Linear(dims, dims)
24
+
25
+ def __call__(self, x):
26
+ B, H, W, C = x.shape
27
+
28
+ y = self.group_norm(x)
29
+
30
+ queries = self.query_proj(y).reshape(B, H * W, C)
31
+ keys = self.key_proj(y).reshape(B, H * W, C)
32
+ values = self.value_proj(y).reshape(B, H * W, C)
33
+
34
+ scale = 1 / math.sqrt(queries.shape[-1])
35
+ scores = (queries * scale) @ keys.transpose(0, 2, 1)
36
+ attn = mx.softmax(scores, axis=-1)
37
+ y = (attn @ values).reshape(B, H, W, C)
38
+
39
+ y = self.out_proj(y)
40
+ x = x + y
41
+
42
+ return x
43
+
44
+
45
+ class EncoderDecoderBlock2D(nn.Module):
46
+ def __init__(
47
+ self,
48
+ in_channels: int,
49
+ out_channels: int,
50
+ num_layers: int = 1,
51
+ resnet_groups: int = 32,
52
+ add_downsample=True,
53
+ add_upsample=True,
54
+ ):
55
+ super().__init__()
56
+
57
+ # Add the resnet blocks
58
+ self.resnets = [
59
+ ResnetBlock2D(
60
+ in_channels=in_channels if i == 0 else out_channels,
61
+ out_channels=out_channels,
62
+ groups=resnet_groups,
63
+ )
64
+ for i in range(num_layers)
65
+ ]
66
+
67
+ # Add an optional downsampling layer
68
+ if add_downsample:
69
+ self.downsample = nn.Conv2d(
70
+ out_channels, out_channels, kernel_size=3, stride=2, padding=0
71
+ )
72
+
73
+ # or upsampling layer
74
+ if add_upsample:
75
+ self.upsample = nn.Conv2d(
76
+ out_channels, out_channels, kernel_size=3, stride=1, padding=1
77
+ )
78
+
79
+ def __call__(self, x):
80
+ for resnet in self.resnets:
81
+ x = resnet(x)
82
+
83
+ if "downsample" in self:
84
+ x = mx.pad(x, [(0, 0), (0, 1), (0, 1), (0, 0)])
85
+ x = self.downsample(x)
86
+
87
+ if "upsample" in self:
88
+ x = self.upsample(upsample_nearest(x))
89
+
90
+ return x
91
+
92
+
93
+ class Encoder(nn.Module):
94
+ """Implements the encoder side of the Autoencoder."""
95
+
96
+ def __init__(
97
+ self,
98
+ in_channels: int,
99
+ out_channels: int,
100
+ block_out_channels: List[int] = [64],
101
+ layers_per_block: int = 2,
102
+ resnet_groups: int = 32,
103
+ ):
104
+ super().__init__()
105
+
106
+ self.conv_in = nn.Conv2d(
107
+ in_channels, block_out_channels[0], kernel_size=3, stride=1, padding=1
108
+ )
109
+
110
+ channels = [block_out_channels[0]] + list(block_out_channels)
111
+ self.down_blocks = [
112
+ EncoderDecoderBlock2D(
113
+ in_channels,
114
+ out_channels,
115
+ num_layers=layers_per_block,
116
+ resnet_groups=resnet_groups,
117
+ add_downsample=i < len(block_out_channels) - 1,
118
+ add_upsample=False,
119
+ )
120
+ for i, (in_channels, out_channels) in enumerate(zip(channels, channels[1:]))
121
+ ]
122
+
123
+ self.mid_blocks = [
124
+ ResnetBlock2D(
125
+ in_channels=block_out_channels[-1],
126
+ out_channels=block_out_channels[-1],
127
+ groups=resnet_groups,
128
+ ),
129
+ Attention(block_out_channels[-1], resnet_groups),
130
+ ResnetBlock2D(
131
+ in_channels=block_out_channels[-1],
132
+ out_channels=block_out_channels[-1],
133
+ groups=resnet_groups,
134
+ ),
135
+ ]
136
+
137
+ self.conv_norm_out = nn.GroupNorm(
138
+ resnet_groups, block_out_channels[-1], pytorch_compatible=True
139
+ )
140
+ self.conv_out = nn.Conv2d(block_out_channels[-1], out_channels, 3, padding=1)
141
+
142
+ def __call__(self, x):
143
+ x = self.conv_in(x)
144
+
145
+ for l in self.down_blocks:
146
+ x = l(x)
147
+
148
+ x = self.mid_blocks[0](x)
149
+ x = self.mid_blocks[1](x)
150
+ x = self.mid_blocks[2](x)
151
+
152
+ x = self.conv_norm_out(x)
153
+ x = nn.silu(x)
154
+ x = self.conv_out(x)
155
+
156
+ return x
157
+
158
+
159
+ class Decoder(nn.Module):
160
+ """Implements the decoder side of the Autoencoder."""
161
+
162
+ def __init__(
163
+ self,
164
+ in_channels: int,
165
+ out_channels: int,
166
+ block_out_channels: List[int] = [64],
167
+ layers_per_block: int = 2,
168
+ resnet_groups: int = 32,
169
+ ):
170
+ super().__init__()
171
+
172
+ self.conv_in = nn.Conv2d(
173
+ in_channels, block_out_channels[-1], kernel_size=3, stride=1, padding=1
174
+ )
175
+
176
+ self.mid_blocks = [
177
+ ResnetBlock2D(
178
+ in_channels=block_out_channels[-1],
179
+ out_channels=block_out_channels[-1],
180
+ groups=resnet_groups,
181
+ ),
182
+ Attention(block_out_channels[-1], resnet_groups),
183
+ ResnetBlock2D(
184
+ in_channels=block_out_channels[-1],
185
+ out_channels=block_out_channels[-1],
186
+ groups=resnet_groups,
187
+ ),
188
+ ]
189
+
190
+ channels = list(reversed(block_out_channels))
191
+ channels = [channels[0]] + channels
192
+ self.up_blocks = [
193
+ EncoderDecoderBlock2D(
194
+ in_channels,
195
+ out_channels,
196
+ num_layers=layers_per_block,
197
+ resnet_groups=resnet_groups,
198
+ add_downsample=False,
199
+ add_upsample=i < len(block_out_channels) - 1,
200
+ )
201
+ for i, (in_channels, out_channels) in enumerate(zip(channels, channels[1:]))
202
+ ]
203
+
204
+ self.conv_norm_out = nn.GroupNorm(
205
+ resnet_groups, block_out_channels[0], pytorch_compatible=True
206
+ )
207
+ self.conv_out = nn.Conv2d(block_out_channels[0], out_channels, 3, padding=1)
208
+
209
+ def __call__(self, x):
210
+ x = self.conv_in(x)
211
+
212
+ x = self.mid_blocks[0](x)
213
+ x = self.mid_blocks[1](x)
214
+ x = self.mid_blocks[2](x)
215
+
216
+ for l in self.up_blocks:
217
+ x = l(x)
218
+
219
+ x = self.conv_norm_out(x)
220
+ x = nn.silu(x)
221
+ x = self.conv_out(x)
222
+
223
+ return x
224
+
225
+
226
+ class Autoencoder(nn.Module):
227
+ """The autoencoder that allows us to perform diffusion in the latent space."""
228
+
229
+ def __init__(self, config: AutoencoderConfig):
230
+ super().__init__()
231
+
232
+ self.latent_channels = config.latent_channels_in
233
+ self.scaling_factor = config.scaling_factor
234
+ self.encoder = Encoder(
235
+ config.in_channels,
236
+ config.latent_channels_out,
237
+ config.block_out_channels,
238
+ config.layers_per_block,
239
+ resnet_groups=config.norm_num_groups,
240
+ )
241
+ self.decoder = Decoder(
242
+ config.latent_channels_in,
243
+ config.out_channels,
244
+ config.block_out_channels,
245
+ config.layers_per_block + 1,
246
+ resnet_groups=config.norm_num_groups,
247
+ )
248
+
249
+ self.quant_proj = nn.Linear(
250
+ config.latent_channels_out, config.latent_channels_out
251
+ )
252
+ self.post_quant_proj = nn.Linear(
253
+ config.latent_channels_in, config.latent_channels_in
254
+ )
255
+
256
+ def decode(self, z):
257
+ z = z / self.scaling_factor
258
+ return self.decoder(self.post_quant_proj(z))
259
+
260
+ def encode(self, x):
261
+ x = self.encoder(x)
262
+ x = self.quant_proj(x)
263
+ mean, logvar = x.split(2, axis=-1)
264
+ mean = mean * self.scaling_factor
265
+ logvar = logvar + 2 * math.log(self.scaling_factor)
266
+
267
+ return mean, logvar
268
+
269
+ def __call__(self, x, key=None):
270
+ mean, logvar = self.encode(x)
271
+ z = mx.random.normal(mean.shape, key=key) * mx.exp(0.5 * logvar) + mean
272
+ x_hat = self.decode(z)
273
+
274
+ return dict(x_hat=x_hat, z=z, mean=mean, logvar=logvar)
File without changes
@@ -0,0 +1,149 @@
1
+ import argparse
2
+ from mlx_lm.models.cache import make_prompt_cache
3
+ import mlx.core as mx
4
+ import mlx.nn as nn
5
+ from mlx.utils import tree_reduce
6
+ from transformers import PreTrainedTokenizer
7
+ from mlx_lm.models import cache
8
+ from mlx_lm.models.cache import (
9
+ QuantizedKVCache,
10
+ load_prompt_cache,
11
+ )
12
+ from mlx_lm.sample_utils import make_sampler
13
+ from mlx_lm.tokenizer_utils import TokenizerWrapper
14
+ from mlx_lm.utils import does_model_support_input_embeddings, load
15
+ from mlx_lm.generate import stream_generate
16
+
17
+ DEFAULT_TEMP = 0.0
18
+ DEFAULT_TOP_P = 1.0
19
+ DEFAULT_XTC_PROBABILITY = 0.0
20
+ DEFAULT_XTC_THRESHOLD = 0.0
21
+ DEFAULT_SEED = None
22
+ DEFAULT_MAX_TOKENS = 256
23
+ DEFAULT_MODEL = "mlx-community/Qwen3-1.7B-4bit-DWQ"
24
+
25
+
26
+ def str2bool(string):
27
+ return string.lower() not in ["false", "f"]
28
+
29
+
30
+ def setup_arg_parser():
31
+ """Set up and return the argument parser."""
32
+ parser = argparse.ArgumentParser(description="Chat with an LLM")
33
+ parser.add_argument(
34
+ "--model",
35
+ type=str,
36
+ help="The path to the local model directory or Hugging Face repo.",
37
+ default=DEFAULT_MODEL,
38
+ )
39
+ parser.add_argument(
40
+ "--adapter-path",
41
+ type=str,
42
+ help="Optional path for the trained adapter weights and config.",
43
+ )
44
+ parser.add_argument(
45
+ "--temp", type=float, default=DEFAULT_TEMP, help="Sampling temperature"
46
+ )
47
+ parser.add_argument(
48
+ "--top-p", type=float, default=DEFAULT_TOP_P, help="Sampling top-p"
49
+ )
50
+ parser.add_argument(
51
+ "--xtc-probability",
52
+ type=float,
53
+ default=DEFAULT_XTC_PROBABILITY,
54
+ help="Probability of XTC sampling to happen each next token",
55
+ )
56
+ parser.add_argument(
57
+ "--xtc-threshold",
58
+ type=float,
59
+ default=0.0,
60
+ help="Thresold the probs of each next token candidate to be sampled by XTC",
61
+ )
62
+ parser.add_argument(
63
+ "--seed",
64
+ type=int,
65
+ default=DEFAULT_SEED,
66
+ help="PRNG seed",
67
+ )
68
+ parser.add_argument(
69
+ "--max-kv-size",
70
+ type=int,
71
+ help="Set the maximum key-value cache size",
72
+ default=None,
73
+ )
74
+ parser.add_argument(
75
+ "--max-tokens",
76
+ "-m",
77
+ type=int,
78
+ default=DEFAULT_MAX_TOKENS,
79
+ help="Maximum number of tokens to generate",
80
+ )
81
+ return parser
82
+
83
+
84
+ def main():
85
+ parser = setup_arg_parser()
86
+ args = parser.parse_args()
87
+
88
+ model, tokenizer = load(
89
+ args.model,
90
+ adapter_path=args.adapter_path,
91
+ tokenizer_config={"trust_remote_code": True},
92
+ )
93
+
94
+ # Initialize chat history
95
+ chat = []
96
+
97
+ while True:
98
+ try:
99
+ user_input = input("User: ").strip()
100
+
101
+ # Exit conditions
102
+ if user_input.lower() in ['exit', 'quit', '']:
103
+ break
104
+
105
+ chat.append({"role": "user", "content": user_input})
106
+
107
+ formatted_prompt = tokenizer.apply_chat_template(chat, add_generation_prompt=True)
108
+
109
+ # Generate response
110
+ response = ""
111
+ print("Assistant: ", end="", flush=True)
112
+
113
+ for chunk in stream_generate(
114
+ model,
115
+ tokenizer,
116
+ formatted_prompt,
117
+ max_tokens=args.max_tokens,
118
+ sampler=make_sampler(
119
+ args.temp,
120
+ args.top_p,
121
+ xtc_threshold=args.xtc_threshold,
122
+ xtc_probability=args.xtc_probability,
123
+ xtc_special_tokens=(
124
+ tokenizer.encode("\n") + list(tokenizer.eos_token_ids)
125
+ ),
126
+ ),
127
+ ):
128
+ response += chunk.text
129
+ print(chunk.text, end="", flush=True)
130
+
131
+ print() # New line after response
132
+
133
+ # Add assistant response to chat history
134
+ chat.append({"role": "assistant", "content": response})
135
+
136
+ except KeyboardInterrupt:
137
+ print("\nConversation interrupted by user.")
138
+ break
139
+ except Exception as e:
140
+ print(f"Error: {e}")
141
+ continue
142
+
143
+
144
+ if __name__ == "__main__":
145
+ print(
146
+ "Calling `python -m mlx_lm.chat...` directly is deprecated."
147
+ " Use `mlx_lm.chat...` or `python -m mlx_lm chat ...` instead."
148
+ )
149
+ main()