nexaai 1.0.19rc5__cp310-cp310-macosx_14_0_universal2.whl → 1.0.19rc7__cp310-cp310-macosx_14_0_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nexaai might be problematic. Click here for more details.

Files changed (221) hide show
  1. nexaai/_stub.cpython-310-darwin.so +0 -0
  2. nexaai/_version.py +1 -1
  3. nexaai/binds/libnexa_bridge.dylib +0 -0
  4. nexaai/binds/nexa_llama_cpp/libggml-base.dylib +0 -0
  5. nexaai/binds/nexa_llama_cpp/libggml-cpu.so +0 -0
  6. nexaai/binds/nexa_llama_cpp/libggml-metal.so +0 -0
  7. nexaai/binds/nexa_llama_cpp/libggml.dylib +0 -0
  8. nexaai/binds/nexa_llama_cpp/libllama.dylib +0 -0
  9. nexaai/binds/nexa_llama_cpp/libmtmd.dylib +0 -0
  10. nexaai/binds/nexa_llama_cpp/libnexa_plugin.dylib +0 -0
  11. nexaai/binds/nexa_mlx/libnexa_plugin.dylib +0 -0
  12. nexaai/binds/nexa_mlx/py-lib/asr/__init__.py +12 -0
  13. nexaai/binds/nexa_mlx/py-lib/asr/interface.py +122 -0
  14. nexaai/binds/nexa_mlx/py-lib/common/__init__.py +0 -0
  15. nexaai/binds/nexa_mlx/py-lib/common/utils.py +25 -0
  16. nexaai/binds/nexa_mlx/py-lib/cv/__init__.py +0 -0
  17. nexaai/binds/nexa_mlx/py-lib/cv/generate.py +195 -0
  18. nexaai/binds/nexa_mlx/py-lib/cv/interface.py +151 -0
  19. nexaai/binds/nexa_mlx/py-lib/cv/main.py +81 -0
  20. nexaai/binds/nexa_mlx/py-lib/cv/modeling/pp_ocr_v4.py +1736 -0
  21. nexaai/binds/nexa_mlx/py-lib/embedding/__init__.py +0 -0
  22. nexaai/binds/nexa_mlx/py-lib/embedding/generate.py +333 -0
  23. nexaai/binds/nexa_mlx/py-lib/embedding/interface.py +617 -0
  24. nexaai/binds/nexa_mlx/py-lib/embedding/main.py +173 -0
  25. nexaai/binds/nexa_mlx/py-lib/embedding/modeling/__init__.py +0 -0
  26. nexaai/binds/nexa_mlx/py-lib/embedding/modeling/nexa_jina_v2.py +399 -0
  27. nexaai/binds/nexa_mlx/py-lib/image_gen/__init__.py +1 -0
  28. nexaai/binds/nexa_mlx/py-lib/image_gen/generate_sd.py +244 -0
  29. nexaai/binds/nexa_mlx/py-lib/image_gen/interface.py +82 -0
  30. nexaai/binds/nexa_mlx/py-lib/image_gen/main.py +281 -0
  31. nexaai/binds/nexa_mlx/py-lib/image_gen/stable_diffusion/__init__.py +306 -0
  32. nexaai/binds/nexa_mlx/py-lib/image_gen/stable_diffusion/clip.py +116 -0
  33. nexaai/binds/nexa_mlx/py-lib/image_gen/stable_diffusion/config.py +65 -0
  34. nexaai/binds/nexa_mlx/py-lib/image_gen/stable_diffusion/model_io.py +386 -0
  35. nexaai/binds/nexa_mlx/py-lib/image_gen/stable_diffusion/sampler.py +105 -0
  36. nexaai/binds/nexa_mlx/py-lib/image_gen/stable_diffusion/tokenizer.py +100 -0
  37. nexaai/binds/nexa_mlx/py-lib/image_gen/stable_diffusion/unet.py +460 -0
  38. nexaai/binds/nexa_mlx/py-lib/image_gen/stable_diffusion/vae.py +274 -0
  39. nexaai/binds/nexa_mlx/py-lib/llm/__init__.py +0 -0
  40. nexaai/binds/nexa_mlx/py-lib/llm/generate.py +149 -0
  41. nexaai/binds/nexa_mlx/py-lib/llm/interface.py +764 -0
  42. nexaai/binds/nexa_mlx/py-lib/llm/main.py +68 -0
  43. nexaai/binds/nexa_mlx/py-lib/rerank/__init__.py +0 -0
  44. nexaai/binds/nexa_mlx/py-lib/rerank/generate.py +174 -0
  45. nexaai/binds/nexa_mlx/py-lib/rerank/interface.py +287 -0
  46. nexaai/binds/nexa_mlx/py-lib/rerank/main.py +127 -0
  47. nexaai/binds/nexa_mlx/py-lib/rerank/modeling/__init__.py +0 -0
  48. nexaai/binds/nexa_mlx/py-lib/rerank/modeling/nexa_jina_rerank.py +330 -0
  49. nexaai/binds/nexa_mlx/py-lib/sd/__init__.py +1 -0
  50. nexaai/binds/nexa_mlx/py-lib/sd/interface.py +362 -0
  51. nexaai/binds/nexa_mlx/py-lib/sd/main.py +286 -0
  52. nexaai/binds/nexa_mlx/py-lib/sd/modeling/__init__.py +306 -0
  53. nexaai/binds/nexa_mlx/py-lib/sd/modeling/clip.py +116 -0
  54. nexaai/binds/nexa_mlx/py-lib/sd/modeling/config.py +65 -0
  55. nexaai/binds/nexa_mlx/py-lib/sd/modeling/model_io.py +385 -0
  56. nexaai/binds/nexa_mlx/py-lib/sd/modeling/sampler.py +105 -0
  57. nexaai/binds/nexa_mlx/py-lib/sd/modeling/tokenizer.py +100 -0
  58. nexaai/binds/nexa_mlx/py-lib/sd/modeling/unet.py +460 -0
  59. nexaai/binds/nexa_mlx/py-lib/sd/modeling/vae.py +274 -0
  60. nexaai/binds/nexa_mlx/py-lib/tts/__init__.py +12 -0
  61. nexaai/binds/nexa_mlx/py-lib/tts/interface.py +276 -0
  62. nexaai/binds/nexa_mlx/py-lib/vlm/__init__.py +3 -0
  63. nexaai/binds/nexa_mlx/py-lib/vlm/generate.py +572 -0
  64. nexaai/binds/nexa_mlx/py-lib/vlm/generate_qwen3_vl.py +294 -0
  65. nexaai/binds/nexa_mlx/py-lib/vlm/generate_qwen3_vl_moe.py +276 -0
  66. nexaai/binds/nexa_mlx/py-lib/vlm/interface.py +504 -0
  67. nexaai/binds/nexa_mlx/py-lib/vlm/main.py +320 -0
  68. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/__init__.py +0 -0
  69. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/convert.py +68 -0
  70. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/__init__.py +0 -0
  71. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/aya_vision/__init__.py +8 -0
  72. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/aya_vision/aya_vision.py +193 -0
  73. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/aya_vision/interpolate.py +186 -0
  74. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/aya_vision/language.py +233 -0
  75. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/aya_vision/vision.py +503 -0
  76. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/base.py +202 -0
  77. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/cache.py +230 -0
  78. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/deepseek_vl_v2/__init__.py +10 -0
  79. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/deepseek_vl_v2/conversation.py +264 -0
  80. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/deepseek_vl_v2/deepseek_vl_v2.py +472 -0
  81. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/deepseek_vl_v2/language.py +591 -0
  82. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/deepseek_vl_v2/processing_deepsek_vl_v2.py +526 -0
  83. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/deepseek_vl_v2/vision.py +356 -0
  84. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/florence2/__init__.py +8 -0
  85. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/florence2/florence2.py +366 -0
  86. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/florence2/language.py +488 -0
  87. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/florence2/vision.py +591 -0
  88. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/gemma3/__init__.py +8 -0
  89. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/gemma3/gemma3.py +213 -0
  90. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/gemma3/language.py +315 -0
  91. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/gemma3/vision.py +238 -0
  92. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/gemma3n/__init__.py +2 -0
  93. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/gemma3n/audio.py +1038 -0
  94. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/gemma3n/config.py +139 -0
  95. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/gemma3n/gemma3n.py +322 -0
  96. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/gemma3n/language.py +629 -0
  97. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/gemma3n/vision.py +1022 -0
  98. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/idefics2/__init__.py +9 -0
  99. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/idefics2/idefics2.py +294 -0
  100. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/idefics2/language.py +191 -0
  101. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/idefics2/vision.py +267 -0
  102. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/idefics3/__init__.py +8 -0
  103. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/idefics3/idefics3.py +175 -0
  104. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/idefics3/language.py +192 -0
  105. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/idefics3/vision.py +233 -0
  106. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/internvl_chat/__init__.py +9 -0
  107. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/internvl_chat/internvl_chat.py +140 -0
  108. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/internvl_chat/language.py +220 -0
  109. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/internvl_chat/processor.py +393 -0
  110. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/internvl_chat/vision.py +293 -0
  111. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/kernels.py +307 -0
  112. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/kimi_vl/__init__.py +8 -0
  113. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/kimi_vl/kimi_vl.py +143 -0
  114. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/kimi_vl/language.py +509 -0
  115. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/kimi_vl/vision.py +522 -0
  116. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/llama4/__init__.py +8 -0
  117. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/llama4/language.py +386 -0
  118. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/llama4/llama4.py +138 -0
  119. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/llama4/vision.py +560 -0
  120. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/llava/__init__.py +8 -0
  121. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/llava/language.py +240 -0
  122. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/llava/llava.py +153 -0
  123. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/llava/vision.py +259 -0
  124. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/llava_bunny/__init__.py +9 -0
  125. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/llava_bunny/language.py +236 -0
  126. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/llava_bunny/llava_bunny.py +256 -0
  127. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/llava_bunny/vision.py +303 -0
  128. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/llava_next/__init__.py +8 -0
  129. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/llava_next/language.py +230 -0
  130. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/llava_next/llava_next.py +160 -0
  131. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/llava_next/vision.py +243 -0
  132. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/mistral3/__init__.py +8 -0
  133. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/mistral3/mistral3.py +283 -0
  134. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/mllama/__init__.py +8 -0
  135. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/mllama/language.py +416 -0
  136. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/mllama/mllama.py +172 -0
  137. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/mllama/vision.py +499 -0
  138. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/molmo/__init__.py +8 -0
  139. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/molmo/language.py +243 -0
  140. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/molmo/molmo.py +133 -0
  141. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/molmo/vision.py +465 -0
  142. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/multi_modality/__init__.py +10 -0
  143. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/multi_modality/language.py +230 -0
  144. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/multi_modality/multi_modality.py +385 -0
  145. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/multi_modality/sam.py +557 -0
  146. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/multi_modality/vision.py +526 -0
  147. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/paligemma/__init__.py +8 -0
  148. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/paligemma/language.py +282 -0
  149. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/paligemma/paligemma.py +160 -0
  150. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/paligemma/vision.py +242 -0
  151. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/phi3_v/__init__.py +8 -0
  152. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/phi3_v/language.py +21 -0
  153. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/phi3_v/phi3_v.py +243 -0
  154. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/phi3_v/su_rope.py +71 -0
  155. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/phi3_v/vision.py +324 -0
  156. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/pixtral/__init__.py +8 -0
  157. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/pixtral/language.py +229 -0
  158. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/pixtral/pixtral.py +161 -0
  159. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/pixtral/vision.py +320 -0
  160. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen2_5_vl/__init__.py +2 -0
  161. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen2_5_vl/config.py +108 -0
  162. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen2_5_vl/language.py +490 -0
  163. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen2_5_vl/qwen2_5_vl.py +168 -0
  164. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen2_5_vl/vision.py +414 -0
  165. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen2_vl/__init__.py +2 -0
  166. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen2_vl/config.py +104 -0
  167. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen2_vl/language.py +490 -0
  168. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen2_vl/qwen2_vl.py +167 -0
  169. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen2_vl/vision.py +312 -0
  170. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen3_vl/llm_common/__init__.py +0 -0
  171. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen3_vl/llm_common/base.py +117 -0
  172. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen3_vl/llm_common/cache.py +531 -0
  173. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen3_vl/llm_common/generate.py +701 -0
  174. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen3_vl/llm_common/rope_utils.py +255 -0
  175. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen3_vl/llm_common/sample_utils.py +303 -0
  176. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen3_vl/llm_common/tokenizer_utils.py +407 -0
  177. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen3_vl/processor.py +476 -0
  178. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen3_vl/qwen3vl.py +1223 -0
  179. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen3vl_moe/llm_common/__init__.py +0 -0
  180. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen3vl_moe/llm_common/base.py +117 -0
  181. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen3vl_moe/llm_common/cache.py +531 -0
  182. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen3vl_moe/llm_common/generate.py +701 -0
  183. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen3vl_moe/llm_common/rope_utils.py +255 -0
  184. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen3vl_moe/llm_common/sample_utils.py +303 -0
  185. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen3vl_moe/llm_common/tokenizer_utils.py +407 -0
  186. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen3vl_moe/processor.py +476 -0
  187. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen3vl_moe/qwen3vl_moe.py +1309 -0
  188. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen3vl_moe/switch_layers.py +210 -0
  189. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/smolvlm/__init__.py +8 -0
  190. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/smolvlm/smolvlm.py +62 -0
  191. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/processing_qwen2_5_vl.py +209 -0
  192. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/processing_qwen2_vl.py +215 -0
  193. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/prompt_utils.py +474 -0
  194. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/sample_utils.py +39 -0
  195. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/tokenizer_utils.py +344 -0
  196. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/trainer/__init__.py +9 -0
  197. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/trainer/lora.py +70 -0
  198. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/trainer/trainer.py +296 -0
  199. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/trainer/utils.py +160 -0
  200. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/utils.py +928 -0
  201. nexaai/binds/nexa_nexaml/libggml-base.dylib +0 -0
  202. nexaai/binds/nexa_nexaml/libggml-cpu.so +0 -0
  203. nexaai/binds/nexa_nexaml/libggml-metal.so +0 -0
  204. nexaai/binds/nexa_nexaml/libggml.dylib +0 -0
  205. nexaai/mlx_backend/vlm/generate_qwen3_vl_moe.py +276 -0
  206. nexaai/mlx_backend/vlm/interface.py +21 -4
  207. nexaai/mlx_backend/vlm/main.py +6 -2
  208. nexaai/mlx_backend/vlm/modeling/models/qwen3vl_moe/llm_common/__init__.py +0 -0
  209. nexaai/mlx_backend/vlm/modeling/models/qwen3vl_moe/llm_common/base.py +117 -0
  210. nexaai/mlx_backend/vlm/modeling/models/qwen3vl_moe/llm_common/cache.py +531 -0
  211. nexaai/mlx_backend/vlm/modeling/models/qwen3vl_moe/llm_common/generate.py +701 -0
  212. nexaai/mlx_backend/vlm/modeling/models/qwen3vl_moe/llm_common/rope_utils.py +255 -0
  213. nexaai/mlx_backend/vlm/modeling/models/qwen3vl_moe/llm_common/sample_utils.py +303 -0
  214. nexaai/mlx_backend/vlm/modeling/models/qwen3vl_moe/llm_common/tokenizer_utils.py +407 -0
  215. nexaai/mlx_backend/vlm/modeling/models/qwen3vl_moe/processor.py +476 -0
  216. nexaai/mlx_backend/vlm/modeling/models/qwen3vl_moe/qwen3vl_moe.py +1309 -0
  217. nexaai/mlx_backend/vlm/modeling/models/qwen3vl_moe/switch_layers.py +210 -0
  218. {nexaai-1.0.19rc5.dist-info → nexaai-1.0.19rc7.dist-info}/METADATA +1 -1
  219. {nexaai-1.0.19rc5.dist-info → nexaai-1.0.19rc7.dist-info}/RECORD +221 -21
  220. {nexaai-1.0.19rc5.dist-info → nexaai-1.0.19rc7.dist-info}/WHEEL +0 -0
  221. {nexaai-1.0.19rc5.dist-info → nexaai-1.0.19rc7.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,243 @@
1
+ import inspect
2
+ from dataclasses import dataclass
3
+ from typing import Any, Dict, List, Optional, Tuple, Union
4
+
5
+ import mlx.core as mx
6
+ import mlx.nn as nn
7
+
8
+ from ..base import (
9
+ LanguageModelOutput,
10
+ create_attention_mask,
11
+ scaled_dot_product_attention,
12
+ )
13
+ from ..cache import KVCache
14
+
15
+
16
+ @dataclass
17
+ class TextConfig:
18
+ model_type: str = "molmo"
19
+ max_position_embeddings: int = 4096
20
+ d_model: int = 3584
21
+ n_heads: int = 28
22
+ n_kv_heads: int = 4
23
+ n_layers: int = 28
24
+ mlp_ratio: int = 4
25
+ max_sequence_length: int = 1024
26
+ act_output_multiplier: int = 0.5
27
+ mlp_hidden_size: int = 37888
28
+ vocab_size: int = 152064
29
+ embedding_size: Optional[int] = 152064
30
+ additional_vocab_size: Optional[int] = None
31
+ attention_dropout: float = 0.1
32
+ residual_dropout: float = 0.1
33
+ embedding_dropout: float = 0.1
34
+ layer_norm_eps: float = 1e-5
35
+ initializer_range: float = 0.02
36
+ pad_token_id: int = -1
37
+ rope: bool = True
38
+ rope_theta: float = 1000000.0
39
+ weight_tying: bool = False
40
+ rope_full_precision: bool = True
41
+ rope_impl: str = "interleave"
42
+ additional_vocab_size: Optional[int] = 128
43
+
44
+ @classmethod
45
+ def from_dict(cls, params):
46
+ return cls(
47
+ **{
48
+ k: v
49
+ for k, v in params.items()
50
+ if k in inspect.signature(cls).parameters
51
+ }
52
+ )
53
+
54
+
55
+ class SwiGLU(nn.Module):
56
+ def __call__(self, x: mx.array) -> mx.array:
57
+ x, gate = mx.split(x, 2, axis=-1)
58
+ return nn.silu(gate) * x
59
+
60
+
61
+ class MolmoBlock(nn.Module):
62
+ def __init__(self, config: TextConfig):
63
+ super().__init__()
64
+ self.attn_out = nn.Linear(config.d_model, config.d_model, bias=False)
65
+ self.ff_out = nn.Linear(
66
+ int(config.act_output_multiplier * config.mlp_hidden_size),
67
+ config.d_model,
68
+ bias=False,
69
+ )
70
+ self.attn_norm = nn.RMSNorm(config.d_model, eps=config.layer_norm_eps)
71
+ self.ff_norm = nn.RMSNorm(config.d_model, eps=config.layer_norm_eps)
72
+ self.ff_proj = nn.Linear(config.d_model, config.mlp_hidden_size, bias=False)
73
+ head_dim = config.d_model // config.n_heads
74
+ self.rotary_emb = nn.RoPE(head_dim, base=config.rope_theta)
75
+ self.scale = head_dim**-0.5
76
+ self.n_heads = config.n_heads
77
+ self.n_kv_heads = config.n_kv_heads
78
+ self.fused_dims = (
79
+ config.d_model,
80
+ config.n_kv_heads * head_dim,
81
+ config.n_kv_heads * head_dim,
82
+ )
83
+ self.att_proj = nn.Linear(config.d_model, sum(self.fused_dims), bias=True)
84
+ self.act = SwiGLU()
85
+
86
+ def __call__(self, x, mask=None, cache=None):
87
+ batch_size, seq_len, D = x.shape
88
+ attn_in = self.attn_norm(x)
89
+
90
+ qkv = self.att_proj(attn_in)
91
+
92
+ q, k, v = mx.split(
93
+ qkv, [self.fused_dims[0], self.fused_dims[0] + self.fused_dims[1]], axis=-1
94
+ )
95
+
96
+ q = q.reshape(batch_size, seq_len, self.n_heads, D // self.n_heads).transpose(
97
+ 0, 2, 1, 3
98
+ )
99
+ k = k.reshape(
100
+ batch_size, seq_len, self.n_kv_heads, D // self.n_heads
101
+ ).transpose(0, 2, 1, 3)
102
+ v = v.reshape(
103
+ batch_size, seq_len, self.n_kv_heads, D // self.n_heads
104
+ ).transpose(0, 2, 1, 3)
105
+
106
+ if cache is not None:
107
+ q = self.rotary_emb(q, offset=cache.offset)
108
+ k = self.rotary_emb(k, offset=cache.offset)
109
+ k, v = cache.update_and_fetch(k, v)
110
+ else:
111
+ q = self.rotary_emb(q)
112
+ k = self.rotary_emb(k)
113
+
114
+ # Perform attention
115
+ att = scaled_dot_product_attention(q, k, v, cache, scale=self.scale, mask=mask)
116
+ att = att.transpose(0, 2, 1, 3).reshape(batch_size, seq_len, D)
117
+ att = self.attn_out(att)
118
+
119
+ # Add attention scores
120
+ # shape: (batch_size, seq_len, d_model)
121
+ x = x + att
122
+
123
+ # Feed-forward layer
124
+ og_x = x
125
+ x = self.ff_norm(x)
126
+ x = self.ff_proj(x)
127
+ x = self.act(x)
128
+ x = self.ff_out(x)
129
+ x = og_x + x
130
+
131
+ return x
132
+
133
+
134
+ class Embedding(nn.Module):
135
+ def __init__(
136
+ self,
137
+ num_embeddings: int,
138
+ num_new_embeddings: int,
139
+ features: int,
140
+ initializer_range: float = 0.02,
141
+ new_embed_initializer_range: float = 0.02,
142
+ ):
143
+ super().__init__()
144
+ self.initializer_range = initializer_range
145
+ self.new_embed_initializer_range = new_embed_initializer_range
146
+
147
+ # Initialize embeddings
148
+ self.embedding = mx.random.normal(
149
+ (num_embeddings, features), scale=self.initializer_range
150
+ )
151
+ self.new_embedding = mx.random.normal(
152
+ (num_new_embeddings, features), scale=self.new_embed_initializer_range
153
+ )
154
+
155
+ def __call__(self, x: mx.array) -> mx.array:
156
+ return mx.concat([self.embedding, self.new_embedding], axis=0)[x]
157
+
158
+
159
+ class Molmo(nn.Module):
160
+ def __init__(self, config: TextConfig):
161
+ super().__init__()
162
+ self.config = config
163
+
164
+ self.wte = Embedding(
165
+ config.embedding_size, config.additional_vocab_size, config.d_model
166
+ )
167
+
168
+ self.blocks = [MolmoBlock(config) for _ in range(config.n_layers)]
169
+
170
+ self.ln_f = nn.RMSNorm(config.d_model, eps=config.layer_norm_eps)
171
+
172
+ if not config.weight_tying:
173
+ self.ff_out = nn.Linear(config.d_model, config.vocab_size, bias=False)
174
+
175
+ def __call__(
176
+ self,
177
+ input_ids: mx.array,
178
+ inputs_embeds: Optional[mx.array] = None,
179
+ mask: Optional[mx.array] = None,
180
+ cache: Optional[KVCache] = None,
181
+ ) -> LanguageModelOutput:
182
+
183
+ if inputs_embeds is None:
184
+ h = self.wte(input_ids)
185
+ else:
186
+ h = inputs_embeds
187
+
188
+ if cache is None:
189
+ cache = [None] * self.config.n_layers
190
+
191
+ if mask is None:
192
+ mask = create_attention_mask(h, cache)
193
+
194
+ for block, c in zip(self.blocks, cache):
195
+ h = block(h, mask, c)
196
+
197
+ h = self.ln_f(h)
198
+
199
+ if self.config.weight_tying:
200
+ logits = mx.matmul(h, self.wte.weight.T)
201
+ else:
202
+ logits = self.ff_out(h)
203
+
204
+ return LanguageModelOutput(logits=logits)
205
+
206
+
207
+ class LanguageModel(nn.Module):
208
+ def __init__(self, config: TextConfig):
209
+ super().__init__()
210
+ self.config = config
211
+ self.model_type = config.model_type
212
+ if self.model_type != "molmo":
213
+ raise ValueError(
214
+ f"Model type {self.model_type} not supported. Currently only 'molmo' is supported"
215
+ )
216
+ self.model = Molmo(config)
217
+
218
+ def __call__(
219
+ self,
220
+ input_ids: mx.array,
221
+ inputs_embeds: Optional[mx.array] = None,
222
+ mask: Optional[mx.array] = None,
223
+ cache: Optional[KVCache] = None,
224
+ ) -> LanguageModelOutput:
225
+ outputs = self.model(input_ids, inputs_embeds, mask, cache)
226
+ return outputs
227
+
228
+ @staticmethod
229
+ def sanitize(weights):
230
+ # Remove unused precomputed rotary freqs
231
+ return {k: v for k, v in weights.items() if "rotary_emb.inv_freq" not in k}
232
+
233
+ @property
234
+ def layers(self):
235
+ return self.model.blocks
236
+
237
+ @property
238
+ def head_dim(self):
239
+ return self.config.d_model // self.config.n_heads
240
+
241
+ @property
242
+ def n_kv_heads(self):
243
+ return self.config.n_kv_heads
@@ -0,0 +1,133 @@
1
+ import glob
2
+ import inspect
3
+ import json
4
+ from dataclasses import dataclass, field
5
+ from pathlib import Path
6
+ from typing import Dict, List, Optional, Tuple, Union
7
+
8
+ import mlx.core as mx
9
+ import mlx.nn as nn
10
+ import numpy as np
11
+ from huggingface_hub import snapshot_download
12
+
13
+ from .language import LanguageModel, TextConfig
14
+ from .vision import VisionConfig, VisionModel
15
+
16
+
17
+ @dataclass
18
+ class ModelConfig:
19
+ text_config: TextConfig = field(default_factory=TextConfig)
20
+ vision_config: VisionConfig = field(default_factory=VisionConfig)
21
+ model_type: str = "molmo"
22
+ image_feature_dropout: float = 0.0
23
+ image_pooling_h: int = 2
24
+ image_pooling_w: int = 2
25
+ image_pooling_2d: str = "attention"
26
+ image_projector: str = "mlp"
27
+ eos_token_id: Optional[List[int]] = None
28
+
29
+ @classmethod
30
+ def from_dict(cls, params):
31
+ return cls(
32
+ **{
33
+ k: v
34
+ for k, v in params.items()
35
+ if k in inspect.signature(cls).parameters
36
+ }
37
+ )
38
+
39
+
40
+ class Model(nn.Module):
41
+ def __init__(self, config: ModelConfig):
42
+ super().__init__()
43
+ self.config = config
44
+ self.language_model = LanguageModel(config.text_config)
45
+ self.vision_tower = VisionModel(config.vision_config)
46
+
47
+ @property
48
+ def layers(self):
49
+ return self.language_model.model.layers
50
+
51
+ def __call__(
52
+ self,
53
+ input_ids: mx.array,
54
+ pixel_values: mx.array,
55
+ mask: mx.array,
56
+ cache=None,
57
+ **kwargs,
58
+ ) -> Dict[str, Union[mx.array, List[Tuple[mx.array, mx.array]]]]:
59
+ if input_ids.ndim == 1:
60
+ input_ids = input_ids[None, :]
61
+
62
+ batch_size, seq_len = input_ids.shape
63
+
64
+ image_input_idx = kwargs.get("image_input_idx", None)
65
+ image_masks = kwargs.get("image_masks", None)
66
+
67
+ if pixel_values is not None:
68
+ assert (
69
+ image_masks is not None and image_input_idx is not None
70
+ ), "image_masks and image_input_idx must be provided when images are given"
71
+
72
+ dtype = self.vision_tower.image_vit.patch_embedding.weight.dtype
73
+ pixel_values = pixel_values.astype(dtype)
74
+
75
+ # Process images
76
+ if pixel_values.ndim == 3:
77
+ pixel_values = mx.expand_dims(pixel_values, 0)
78
+ image_masks = (
79
+ mx.expand_dims(image_masks, 0) if image_masks is not None else None
80
+ )
81
+ image_input_idx = (
82
+ mx.expand_dims(image_input_idx, 0)
83
+ if image_input_idx is not None
84
+ else None
85
+ )
86
+
87
+ image_features, cls_embed = self.vision_tower(pixel_values, image_masks)
88
+
89
+ # Insert image features into the input embeddings
90
+ num_image, num_patch = image_features.shape[1:3]
91
+
92
+ assert image_input_idx.shape == (
93
+ batch_size,
94
+ num_image,
95
+ num_patch,
96
+ ), f"image_input_idx.shape: {image_input_idx.shape}, expected: {(batch_size, num_image, num_patch)}"
97
+
98
+ # Insert image features into the input embeddings
99
+ image_features = image_features.reshape(
100
+ batch_size, num_image * num_patch, -1
101
+ )
102
+ image_input_idx = image_input_idx.reshape(batch_size, num_image * num_patch)
103
+
104
+ valid = np.where(image_input_idx >= 0)[0].tolist()
105
+ batch_idx = mx.arange(batch_size)
106
+ batch_idx = mx.tile(batch_idx[:, None], [1, image_features.shape[1]])
107
+
108
+ input_embeddings = self.language_model.model.wte(input_ids)
109
+ input_embeddings[
110
+ batch_idx[valid], image_input_idx[valid]
111
+ ] += image_features[valid]
112
+ else:
113
+ input_embeddings = None
114
+
115
+ # Forward pass through the language model
116
+ logits = self.language_model(
117
+ input_ids,
118
+ inputs_embeds=input_embeddings,
119
+ mask=mask,
120
+ cache=cache,
121
+ )
122
+
123
+ return logits
124
+
125
+ def sanitize(self, weights):
126
+ def transform_key(key):
127
+ if "model.transformer" in key:
128
+ key = key.replace("model.transformer", "language_model.model")
129
+ if "model.vision_backbone" in key:
130
+ key = key.replace("model.vision_backbone", "vision_tower")
131
+ return key
132
+
133
+ return {transform_key(k): v for k, v in weights.items()}