nexaai 1.0.19rc7__cp310-cp310-macosx_14_0_universal2.whl → 1.0.19rc9__cp310-cp310-macosx_14_0_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nexaai might be problematic. Click here for more details.

Files changed (200) hide show
  1. nexaai/_stub.cpython-310-darwin.so +0 -0
  2. nexaai/_version.py +1 -1
  3. nexaai/binds/libnexa_bridge.dylib +0 -0
  4. nexaai/mlx_backend/vlm/generate_qwen3_vl.py +14 -31
  5. nexaai/mlx_backend/vlm/generate_qwen3_vl_moe.py +15 -32
  6. nexaai/mlx_backend/vlm/modeling/models/qwen3_vl/qwen3vl.py +7 -23
  7. nexaai/mlx_backend/vlm/modeling/models/qwen3vl_moe/qwen3vl_moe.py +8 -24
  8. {nexaai-1.0.19rc7.dist-info → nexaai-1.0.19rc9.dist-info}/METADATA +1 -1
  9. {nexaai-1.0.19rc7.dist-info → nexaai-1.0.19rc9.dist-info}/RECORD +11 -200
  10. nexaai/binds/nexa_mlx/py-lib/asr/__init__.py +0 -12
  11. nexaai/binds/nexa_mlx/py-lib/asr/interface.py +0 -122
  12. nexaai/binds/nexa_mlx/py-lib/common/__init__.py +0 -0
  13. nexaai/binds/nexa_mlx/py-lib/common/utils.py +0 -25
  14. nexaai/binds/nexa_mlx/py-lib/cv/__init__.py +0 -0
  15. nexaai/binds/nexa_mlx/py-lib/cv/generate.py +0 -195
  16. nexaai/binds/nexa_mlx/py-lib/cv/interface.py +0 -151
  17. nexaai/binds/nexa_mlx/py-lib/cv/main.py +0 -81
  18. nexaai/binds/nexa_mlx/py-lib/cv/modeling/pp_ocr_v4.py +0 -1736
  19. nexaai/binds/nexa_mlx/py-lib/embedding/__init__.py +0 -0
  20. nexaai/binds/nexa_mlx/py-lib/embedding/generate.py +0 -333
  21. nexaai/binds/nexa_mlx/py-lib/embedding/interface.py +0 -617
  22. nexaai/binds/nexa_mlx/py-lib/embedding/main.py +0 -173
  23. nexaai/binds/nexa_mlx/py-lib/embedding/modeling/__init__.py +0 -0
  24. nexaai/binds/nexa_mlx/py-lib/embedding/modeling/nexa_jina_v2.py +0 -399
  25. nexaai/binds/nexa_mlx/py-lib/image_gen/__init__.py +0 -1
  26. nexaai/binds/nexa_mlx/py-lib/image_gen/generate_sd.py +0 -244
  27. nexaai/binds/nexa_mlx/py-lib/image_gen/interface.py +0 -82
  28. nexaai/binds/nexa_mlx/py-lib/image_gen/main.py +0 -281
  29. nexaai/binds/nexa_mlx/py-lib/image_gen/stable_diffusion/__init__.py +0 -306
  30. nexaai/binds/nexa_mlx/py-lib/image_gen/stable_diffusion/clip.py +0 -116
  31. nexaai/binds/nexa_mlx/py-lib/image_gen/stable_diffusion/config.py +0 -65
  32. nexaai/binds/nexa_mlx/py-lib/image_gen/stable_diffusion/model_io.py +0 -386
  33. nexaai/binds/nexa_mlx/py-lib/image_gen/stable_diffusion/sampler.py +0 -105
  34. nexaai/binds/nexa_mlx/py-lib/image_gen/stable_diffusion/tokenizer.py +0 -100
  35. nexaai/binds/nexa_mlx/py-lib/image_gen/stable_diffusion/unet.py +0 -460
  36. nexaai/binds/nexa_mlx/py-lib/image_gen/stable_diffusion/vae.py +0 -274
  37. nexaai/binds/nexa_mlx/py-lib/llm/__init__.py +0 -0
  38. nexaai/binds/nexa_mlx/py-lib/llm/generate.py +0 -149
  39. nexaai/binds/nexa_mlx/py-lib/llm/interface.py +0 -764
  40. nexaai/binds/nexa_mlx/py-lib/llm/main.py +0 -68
  41. nexaai/binds/nexa_mlx/py-lib/rerank/__init__.py +0 -0
  42. nexaai/binds/nexa_mlx/py-lib/rerank/generate.py +0 -174
  43. nexaai/binds/nexa_mlx/py-lib/rerank/interface.py +0 -287
  44. nexaai/binds/nexa_mlx/py-lib/rerank/main.py +0 -127
  45. nexaai/binds/nexa_mlx/py-lib/rerank/modeling/__init__.py +0 -0
  46. nexaai/binds/nexa_mlx/py-lib/rerank/modeling/nexa_jina_rerank.py +0 -330
  47. nexaai/binds/nexa_mlx/py-lib/sd/__init__.py +0 -1
  48. nexaai/binds/nexa_mlx/py-lib/sd/interface.py +0 -362
  49. nexaai/binds/nexa_mlx/py-lib/sd/main.py +0 -286
  50. nexaai/binds/nexa_mlx/py-lib/sd/modeling/__init__.py +0 -306
  51. nexaai/binds/nexa_mlx/py-lib/sd/modeling/clip.py +0 -116
  52. nexaai/binds/nexa_mlx/py-lib/sd/modeling/config.py +0 -65
  53. nexaai/binds/nexa_mlx/py-lib/sd/modeling/model_io.py +0 -385
  54. nexaai/binds/nexa_mlx/py-lib/sd/modeling/sampler.py +0 -105
  55. nexaai/binds/nexa_mlx/py-lib/sd/modeling/tokenizer.py +0 -100
  56. nexaai/binds/nexa_mlx/py-lib/sd/modeling/unet.py +0 -460
  57. nexaai/binds/nexa_mlx/py-lib/sd/modeling/vae.py +0 -274
  58. nexaai/binds/nexa_mlx/py-lib/tts/__init__.py +0 -12
  59. nexaai/binds/nexa_mlx/py-lib/tts/interface.py +0 -276
  60. nexaai/binds/nexa_mlx/py-lib/vlm/__init__.py +0 -3
  61. nexaai/binds/nexa_mlx/py-lib/vlm/generate.py +0 -572
  62. nexaai/binds/nexa_mlx/py-lib/vlm/generate_qwen3_vl.py +0 -294
  63. nexaai/binds/nexa_mlx/py-lib/vlm/generate_qwen3_vl_moe.py +0 -276
  64. nexaai/binds/nexa_mlx/py-lib/vlm/interface.py +0 -504
  65. nexaai/binds/nexa_mlx/py-lib/vlm/main.py +0 -320
  66. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/__init__.py +0 -0
  67. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/convert.py +0 -68
  68. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/__init__.py +0 -0
  69. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/aya_vision/__init__.py +0 -8
  70. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/aya_vision/aya_vision.py +0 -193
  71. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/aya_vision/interpolate.py +0 -186
  72. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/aya_vision/language.py +0 -233
  73. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/aya_vision/vision.py +0 -503
  74. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/base.py +0 -202
  75. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/cache.py +0 -230
  76. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/deepseek_vl_v2/__init__.py +0 -10
  77. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/deepseek_vl_v2/conversation.py +0 -264
  78. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/deepseek_vl_v2/deepseek_vl_v2.py +0 -472
  79. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/deepseek_vl_v2/language.py +0 -591
  80. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/deepseek_vl_v2/processing_deepsek_vl_v2.py +0 -526
  81. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/deepseek_vl_v2/vision.py +0 -356
  82. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/florence2/__init__.py +0 -8
  83. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/florence2/florence2.py +0 -366
  84. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/florence2/language.py +0 -488
  85. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/florence2/vision.py +0 -591
  86. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/gemma3/__init__.py +0 -8
  87. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/gemma3/gemma3.py +0 -213
  88. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/gemma3/language.py +0 -315
  89. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/gemma3/vision.py +0 -238
  90. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/gemma3n/__init__.py +0 -2
  91. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/gemma3n/audio.py +0 -1038
  92. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/gemma3n/config.py +0 -139
  93. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/gemma3n/gemma3n.py +0 -322
  94. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/gemma3n/language.py +0 -629
  95. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/gemma3n/vision.py +0 -1022
  96. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/idefics2/__init__.py +0 -9
  97. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/idefics2/idefics2.py +0 -294
  98. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/idefics2/language.py +0 -191
  99. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/idefics2/vision.py +0 -267
  100. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/idefics3/__init__.py +0 -8
  101. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/idefics3/idefics3.py +0 -175
  102. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/idefics3/language.py +0 -192
  103. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/idefics3/vision.py +0 -233
  104. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/internvl_chat/__init__.py +0 -9
  105. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/internvl_chat/internvl_chat.py +0 -140
  106. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/internvl_chat/language.py +0 -220
  107. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/internvl_chat/processor.py +0 -393
  108. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/internvl_chat/vision.py +0 -293
  109. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/kernels.py +0 -307
  110. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/kimi_vl/__init__.py +0 -8
  111. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/kimi_vl/kimi_vl.py +0 -143
  112. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/kimi_vl/language.py +0 -509
  113. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/kimi_vl/vision.py +0 -522
  114. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/llama4/__init__.py +0 -8
  115. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/llama4/language.py +0 -386
  116. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/llama4/llama4.py +0 -138
  117. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/llama4/vision.py +0 -560
  118. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/llava/__init__.py +0 -8
  119. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/llava/language.py +0 -240
  120. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/llava/llava.py +0 -153
  121. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/llava/vision.py +0 -259
  122. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/llava_bunny/__init__.py +0 -9
  123. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/llava_bunny/language.py +0 -236
  124. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/llava_bunny/llava_bunny.py +0 -256
  125. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/llava_bunny/vision.py +0 -303
  126. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/llava_next/__init__.py +0 -8
  127. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/llava_next/language.py +0 -230
  128. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/llava_next/llava_next.py +0 -160
  129. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/llava_next/vision.py +0 -243
  130. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/mistral3/__init__.py +0 -8
  131. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/mistral3/mistral3.py +0 -283
  132. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/mllama/__init__.py +0 -8
  133. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/mllama/language.py +0 -416
  134. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/mllama/mllama.py +0 -172
  135. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/mllama/vision.py +0 -499
  136. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/molmo/__init__.py +0 -8
  137. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/molmo/language.py +0 -243
  138. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/molmo/molmo.py +0 -133
  139. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/molmo/vision.py +0 -465
  140. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/multi_modality/__init__.py +0 -10
  141. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/multi_modality/language.py +0 -230
  142. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/multi_modality/multi_modality.py +0 -385
  143. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/multi_modality/sam.py +0 -557
  144. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/multi_modality/vision.py +0 -526
  145. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/paligemma/__init__.py +0 -8
  146. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/paligemma/language.py +0 -282
  147. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/paligemma/paligemma.py +0 -160
  148. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/paligemma/vision.py +0 -242
  149. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/phi3_v/__init__.py +0 -8
  150. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/phi3_v/language.py +0 -21
  151. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/phi3_v/phi3_v.py +0 -243
  152. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/phi3_v/su_rope.py +0 -71
  153. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/phi3_v/vision.py +0 -324
  154. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/pixtral/__init__.py +0 -8
  155. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/pixtral/language.py +0 -229
  156. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/pixtral/pixtral.py +0 -161
  157. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/pixtral/vision.py +0 -320
  158. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen2_5_vl/__init__.py +0 -2
  159. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen2_5_vl/config.py +0 -108
  160. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen2_5_vl/language.py +0 -490
  161. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen2_5_vl/qwen2_5_vl.py +0 -168
  162. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen2_5_vl/vision.py +0 -414
  163. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen2_vl/__init__.py +0 -2
  164. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen2_vl/config.py +0 -104
  165. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen2_vl/language.py +0 -490
  166. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen2_vl/qwen2_vl.py +0 -167
  167. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen2_vl/vision.py +0 -312
  168. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen3_vl/llm_common/__init__.py +0 -0
  169. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen3_vl/llm_common/base.py +0 -117
  170. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen3_vl/llm_common/cache.py +0 -531
  171. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen3_vl/llm_common/generate.py +0 -701
  172. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen3_vl/llm_common/rope_utils.py +0 -255
  173. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen3_vl/llm_common/sample_utils.py +0 -303
  174. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen3_vl/llm_common/tokenizer_utils.py +0 -407
  175. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen3_vl/processor.py +0 -476
  176. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen3_vl/qwen3vl.py +0 -1223
  177. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen3vl_moe/llm_common/__init__.py +0 -0
  178. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen3vl_moe/llm_common/base.py +0 -117
  179. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen3vl_moe/llm_common/cache.py +0 -531
  180. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen3vl_moe/llm_common/generate.py +0 -701
  181. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen3vl_moe/llm_common/rope_utils.py +0 -255
  182. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen3vl_moe/llm_common/sample_utils.py +0 -303
  183. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen3vl_moe/llm_common/tokenizer_utils.py +0 -407
  184. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen3vl_moe/processor.py +0 -476
  185. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen3vl_moe/qwen3vl_moe.py +0 -1309
  186. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen3vl_moe/switch_layers.py +0 -210
  187. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/smolvlm/__init__.py +0 -8
  188. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/smolvlm/smolvlm.py +0 -62
  189. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/processing_qwen2_5_vl.py +0 -209
  190. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/processing_qwen2_vl.py +0 -215
  191. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/prompt_utils.py +0 -474
  192. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/sample_utils.py +0 -39
  193. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/tokenizer_utils.py +0 -344
  194. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/trainer/__init__.py +0 -9
  195. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/trainer/lora.py +0 -70
  196. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/trainer/trainer.py +0 -296
  197. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/trainer/utils.py +0 -160
  198. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/utils.py +0 -928
  199. {nexaai-1.0.19rc7.dist-info → nexaai-1.0.19rc9.dist-info}/WHEEL +0 -0
  200. {nexaai-1.0.19rc7.dist-info → nexaai-1.0.19rc9.dist-info}/top_level.txt +0 -0
@@ -1,504 +0,0 @@
1
- from __future__ import annotations
2
-
3
- import json
4
- import os
5
- import time
6
- from typing import Any, List, Optional, Sequence, Tuple, Union
7
- import mlx.core as mx
8
- import codecs
9
- from dataclasses import dataclass
10
-
11
- # Import configs and callback types from ml.py for API alignment
12
- from ml import (
13
- VLM as BaseVLM,
14
- SamplerConfig,
15
- GenerationConfig,
16
- ChatMessage,
17
- EmbeddingConfig,
18
- TokenCallback,
19
- Path,
20
- Tool, # Add Path alias for type hints
21
- )
22
-
23
- # Import profiling module
24
- from profiling import ProfilingMixin, ProfilingData, StopReason
25
-
26
- # Import from the actual mlx_vlm structure
27
- from .generate import generate, stream_generate, load
28
- from .generate_qwen3_vl import apply_chat_template_qwen3_vl, stream_generate_qwen3_vl, load_qwen3_vl, ContextLengthExceededError
29
-
30
- from .generate_qwen3_vl_moe import apply_chat_template_qwen3_vl as apply_chat_template_qwen3_vl_moe
31
- from .generate_qwen3_vl_moe import stream_generate_qwen3_vl as stream_generate_qwen3_vl_moe
32
- from .generate_qwen3_vl_moe import load_qwen3_vl as load_qwen3_vl_moe
33
-
34
- from .modeling.prompt_utils import apply_chat_template
35
-
36
- # --------------------------------------------------------------------------------------
37
- # Updated GenerationResult to match the new structure
38
- # --------------------------------------------------------------------------------------
39
-
40
- @dataclass
41
- class GenerationResult:
42
- text: str = ""
43
- token: Optional[int] = None
44
- logprobs: Optional[List[float]] = None
45
- prompt_tokens: int = 0
46
- generation_tokens: int = 0
47
- total_tokens: int = 0
48
- prompt_tps: float = 0.0
49
- generation_tps: float = 0.0
50
- peak_memory: float = 0.0
51
- # --------------------------------------------------------------------------------------
52
- # VLM (Vision-Language Model)
53
- # --------------------------------------------------------------------------------------
54
-
55
- class VLM(ProfilingMixin):
56
- """
57
- Vision-Language Models for mlx-vlm
58
- API aligned with ml.py VLM abstract base class.
59
- """
60
-
61
- def __init__(
62
- self,
63
- model_name: Optional[str],
64
- model_path: Path,
65
- mmproj_path: Path,
66
- context_length: int,
67
- device: Optional[str] = None,
68
- ) -> None:
69
- # Initialize profiling mixin
70
- ProfilingMixin.__init__(self)
71
-
72
- # Check if model_path is a file, if so use its parent directory
73
- if os.path.isfile(model_path):
74
- model_path = os.path.dirname(model_path)
75
-
76
- self.model_path = model_path
77
- self.model_name = model_name
78
- self.mmproj_path = mmproj_path
79
- self.context_length = context_length
80
- self.device = device
81
-
82
- if model_name == "qwen3vl-moe":
83
- load_impl = load_qwen3_vl_moe
84
- elif model_name == "qwen3vl":
85
- load_impl = load_qwen3_vl
86
- else:
87
- load_impl = load
88
-
89
- self.model, self.processor = load_impl(str(model_path))
90
-
91
- # Init deafutl sampler config with defualt.
92
- self.sampler_config = SamplerConfig()
93
-
94
- # Track global character position for incremental processing
95
- self.global_n_past_chars = 0
96
-
97
- def destroy(self) -> None:
98
- """Destroy the model and free resources."""
99
- self.model = None
100
- self.processor = None
101
-
102
- def reset(self) -> None:
103
- """Reset the model state."""
104
- self._reset_cache()
105
- self.global_n_past_chars = 0
106
-
107
- def _reset_cache(self) -> None:
108
- """Reset the KV cache."""
109
- # If the model has a cache, reset it
110
- if hasattr(self.model, "cache"):
111
- self.model.cache = None
112
-
113
- # Tokenization
114
- def encode(self, text: str) -> List[int]:
115
- """Encode text to token IDs."""
116
- return self.processor.encode(text)
117
-
118
- def decode(self, token_ids: Sequence[int]) -> str:
119
- """Decode token IDs to text."""
120
- return self.processor.decode(token_ids)
121
-
122
- # Sampler
123
- def set_sampler(self, config: SamplerConfig) -> None:
124
- """Set sampler configuration."""
125
- self.sampler_config = config
126
-
127
- def reset_sampler(self) -> None:
128
- """Reset sampler to default configuration."""
129
- self.sampler_config = None
130
-
131
- # Generation
132
- def generate(
133
- self,
134
- prompt: str,
135
- config: Optional[GenerationConfig] = None,
136
- ) -> GenerationResult:
137
- """Generate text from prompt."""
138
- # Start profiling
139
- self._start_profiling()
140
-
141
- gen_kwargs = {}
142
- if config is not None:
143
- gen_kwargs = config.__dict__.copy()
144
- # Remove image_paths and audio_paths from config as they'll be handled separately
145
- gen_kwargs.pop('image_paths', None)
146
- gen_kwargs.pop('audio_paths', None)
147
- if self.sampler_config is not None:
148
- gen_kwargs.update(self.sampler_config.__dict__)
149
-
150
- # Get image and audio paths from config
151
- image_paths = config.image_paths if config else None
152
- audio_paths = config.audio_paths if config else None
153
-
154
- # Convert paths to strings for generate function
155
- image_list = [str(path) for path in image_paths] if image_paths else None
156
- audio_list = [str(path) for path in audio_paths] if audio_paths else None
157
-
158
- # Extract incremental portion of the prompt (similar to llama.cpp VLM)
159
- full_prompt_len = len(prompt)
160
- incremental_prompt = prompt
161
-
162
- # Apply incremental processing only for non-qwen3vl models
163
- # qwen3vl requires complete JSON conversation structure
164
- if self.model_name != "qwen3vl":
165
- if self.global_n_past_chars < full_prompt_len:
166
- incremental_prompt = prompt[self.global_n_past_chars:]
167
- else:
168
- # No new text to process
169
- incremental_prompt = ""
170
-
171
- # End prompt processing, start decode
172
- self._prompt_end()
173
- self._decode_start()
174
-
175
- try:
176
- # Start timing for generation
177
- generation_start_time = time.perf_counter()
178
-
179
- text, stats = generate(
180
- self.model,
181
- self.processor,
182
- incremental_prompt, # Use incremental prompt instead of full prompt
183
- image=image_list,
184
- audio=audio_list,
185
- **gen_kwargs,
186
- )
187
-
188
- # End timing for generation
189
- generation_end_time = time.perf_counter()
190
-
191
- # Calculate average time per token and estimate TTFT
192
- generated_tokens = stats.get("output_tokens", 0)
193
- if generated_tokens > 0:
194
- total_generation_time = generation_end_time - generation_start_time
195
- avg_time_per_token = total_generation_time / generated_tokens
196
- # TTFT = prompt processing time + first token generation time
197
- # This provides a more accurate estimate than the previous approximation
198
- estimated_ttft = (self._profiling_context.prompt_end_time - self._profiling_context.prompt_start_time) + avg_time_per_token
199
- # Update the profiling context with estimated TTFT
200
- self._profiling_context.first_token_time = self._profiling_context.prompt_start_time + estimated_ttft
201
- self._profiling_context.ttft_recorded = True
202
- else:
203
- # If no tokens generated, use total generation time as TTFT
204
- self._record_ttft()
205
-
206
- # Update profiling data
207
- prompt_tokens = stats.get("input_tokens", 0)
208
- self._update_prompt_tokens(prompt_tokens)
209
- self._update_generated_tokens(generated_tokens)
210
- self._set_stop_reason(StopReason.ML_STOP_REASON_COMPLETED)
211
-
212
- # Update global character position (not needed for qwen3vl JSON processing)
213
- if self.model_name != "qwen3vl":
214
- old_pos = self.global_n_past_chars
215
- self.global_n_past_chars = full_prompt_len + len(text)
216
-
217
- self._decode_end()
218
- self._end_profiling()
219
-
220
- result = GenerationResult(
221
- text=text,
222
- prompt_tokens=prompt_tokens,
223
- generation_tokens=generated_tokens,
224
- total_tokens=stats.get("total_tokens", 0),
225
- prompt_tps=stats.get("prompt_tps", 0.0),
226
- generation_tps=stats.get("generation_tps", 0.0),
227
- peak_memory=stats.get("peak_memory", 0.0),
228
- )
229
-
230
- return result
231
-
232
- except ContextLengthExceededError as e:
233
- self._set_stop_reason(StopReason.ML_STOP_REASON_UNKNOWN)
234
- self._decode_end()
235
- self._end_profiling()
236
- # Re-raise the original exception without wrapping it
237
- raise e
238
- except Exception as e:
239
- import traceback
240
- traceback.print_exc()
241
- self._set_stop_reason(StopReason.ML_STOP_REASON_UNKNOWN)
242
- self._decode_end()
243
- self._end_profiling()
244
- raise RuntimeError(f"Generation error: {str(e)}")
245
-
246
- def generate_stream(
247
- self,
248
- prompt: str,
249
- config: Optional[GenerationConfig],
250
- on_token: Optional[TokenCallback],
251
- ) -> GenerationResult:
252
- """Generate text with streaming callback. Unified method for both text and multimodal generation."""
253
-
254
- # Start profiling
255
- self._start_profiling()
256
-
257
- gen_kwargs = {}
258
- if config is not None:
259
- gen_kwargs = config.__dict__.copy()
260
- # Remove image_paths and audio_paths from config as they'll be handled separately
261
- gen_kwargs.pop('image_paths', None)
262
- gen_kwargs.pop('audio_paths', None)
263
- if self.sampler_config is not None:
264
- gen_kwargs.update(self.sampler_config.__dict__)
265
-
266
-
267
- # Get image and audio paths from config
268
- image_paths = config.image_paths if config else None
269
- audio_paths = config.audio_paths if config else None
270
-
271
- # Convert paths to strings for stream_generate function
272
- image_list = [str(path) for path in image_paths] if image_paths else None
273
- audio_list = [str(path) for path in audio_paths] if audio_paths else None
274
-
275
-
276
- # Extract incremental portion of the prompt (similar to llama.cpp VLM)
277
- full_prompt_len = len(prompt)
278
- incremental_prompt = prompt
279
-
280
-
281
- # Apply incremental processing only for non-qwen3vl models
282
- # qwen3vl requires complete JSON conversation structure
283
- if self.model_name != "qwen3vl":
284
- if self.global_n_past_chars < full_prompt_len:
285
- incremental_prompt = prompt[self.global_n_past_chars:]
286
- else:
287
- # No new text to process
288
- incremental_prompt = ""
289
-
290
- # End prompt processing, start decode
291
- self._prompt_end()
292
- self._decode_start()
293
-
294
- text = ""
295
- last_result = None
296
- first_token = True
297
-
298
- if self.model_name == "qwen3vl-moe":
299
- stream_generate_impl = stream_generate_qwen3_vl_moe
300
- elif self.model_name == "qwen3vl":
301
- stream_generate_impl = stream_generate_qwen3_vl
302
- else:
303
- stream_generate_impl = stream_generate
304
-
305
- try:
306
- token_count = 0
307
-
308
- for result in stream_generate_impl(
309
- self.model,
310
- self.processor,
311
- incremental_prompt, # Use incremental prompt instead of full prompt
312
- image=image_list,
313
- audio=audio_list,
314
- **gen_kwargs,
315
- ):
316
- token_count += 1
317
-
318
- # Record TTFT on first token
319
- if first_token:
320
- self._record_ttft()
321
- first_token = False
322
-
323
- # Call the token callback if provided
324
- if on_token is not None:
325
- if not on_token(result.text):
326
- self._set_stop_reason(StopReason.ML_STOP_REASON_USER)
327
- break
328
- text += result.text
329
- last_result = result
330
-
331
-
332
- # Set stop reason if not user stop
333
- if self._profiling_context.stop_reason != StopReason.ML_STOP_REASON_USER:
334
- self._set_stop_reason(StopReason.ML_STOP_REASON_EOS)
335
-
336
- # Update profiling data
337
- if last_result:
338
- self._update_prompt_tokens(last_result.prompt_tokens)
339
- self._update_generated_tokens(last_result.generation_tokens)
340
-
341
- # Update global character position (not needed for qwen3vl JSON processing)
342
- if self.model_name != "qwen3vl":
343
- old_pos = self.global_n_past_chars
344
- self.global_n_past_chars = full_prompt_len + len(text)
345
-
346
- self._decode_end()
347
- self._end_profiling()
348
-
349
- result = GenerationResult(
350
- text=text,
351
- token=last_result.token if last_result else None,
352
- logprobs=last_result.logprobs if last_result else None,
353
- prompt_tokens=last_result.prompt_tokens if last_result else 0,
354
- generation_tokens=last_result.generation_tokens if last_result else 0,
355
- total_tokens=(last_result.prompt_tokens + last_result.generation_tokens) if last_result else 0,
356
- prompt_tps=last_result.prompt_tps if last_result else 0.0,
357
- generation_tps=last_result.generation_tps if last_result else 0.0,
358
- peak_memory=last_result.peak_memory if last_result else 0.0,
359
- )
360
-
361
- return result
362
-
363
- except ContextLengthExceededError as e:
364
- self._set_stop_reason(StopReason.ML_STOP_REASON_UNKNOWN)
365
- self._decode_end()
366
- self._end_profiling()
367
- # Re-raise the original exception without wrapping it
368
- raise e
369
- except Exception as e:
370
- import traceback
371
- traceback.print_exc()
372
- self._set_stop_reason(StopReason.ML_STOP_REASON_UNKNOWN)
373
- self._decode_end()
374
- self._end_profiling()
375
- raise RuntimeError(f"Streaming generation error: {str(e)}")
376
-
377
- # Legacy multimodal methods - kept for backward compatibility but delegate to unified method
378
- def generate_multimodal(
379
- self,
380
- prompt: str,
381
- image_paths: Optional[Sequence[Path]] = None,
382
- audio_paths: Optional[Sequence[Path]] = None,
383
- config: Optional[GenerationConfig] = None,
384
- ) -> str:
385
- """Generate text from prompt with multiple images and audio."""
386
- # Create config with media paths if not provided
387
- if config is None:
388
- config = GenerationConfig()
389
-
390
- # Update config with provided paths
391
- if image_paths is not None:
392
- config.image_paths = image_paths
393
- if audio_paths is not None:
394
- config.audio_paths = audio_paths
395
-
396
- # Delegate to unified generate method and extract text
397
- result = self.generate(prompt, config)
398
- return result.text
399
-
400
- def generate_stream_multimodal(
401
- self,
402
- prompt: str,
403
- image_paths: Optional[Sequence[Path]] = None,
404
- audio_paths: Optional[Sequence[Path]] = None,
405
- config: Optional[GenerationConfig] = None,
406
- on_token: Optional[TokenCallback] = None,
407
- ) -> str:
408
- """Generate text from prompt with multiple images and audio using streaming callback."""
409
- # Create config with media paths if not provided
410
- if config is None:
411
- config = GenerationConfig()
412
-
413
- # Update config with provided paths
414
- if image_paths is not None:
415
- config.image_paths = image_paths
416
- if audio_paths is not None:
417
- config.audio_paths = audio_paths
418
-
419
- # Delegate to unified generate_stream method and extract text
420
- result = self.generate_stream(prompt, config, on_token)
421
- return result.text
422
-
423
- def get_chat_template(self, template_name: str) -> str:
424
- """Get chat template by name."""
425
- # This is a stub; actual implementation depends on processor internals
426
- if hasattr(self.processor, "get_chat_template"):
427
- return self.processor.get_chat_template(template_name)
428
- return ""
429
-
430
- def apply_chat_template(self, messages: Sequence[ChatMessage], tools: Optional[str] = None, enable_thinking: bool = True) -> str:
431
- """Apply chat template to messages with optional tools support."""
432
- if hasattr(self.processor, "apply_chat_template"):
433
- # Convert ChatMessage objects to dictionaries for the processor
434
- messages_dict = [{"role": msg.role, "content": msg.content} for msg in messages]
435
-
436
- parsed_tools = None
437
- if tools is not None and tools.strip():
438
- parsed_tools = json.loads(tools)
439
-
440
- result = apply_chat_template(self.processor, self.model.config, messages_dict, add_generation_prompt=True, enable_thinking=enable_thinking, tools=parsed_tools)
441
- return result
442
- # Fallback: join messages
443
- return "\n".join([f"{m.role}: {m.content}" for m in messages])
444
-
445
- def apply_chat_template_with_media(self, messages: Sequence[ChatMessage], num_images: int = 0, num_audios: int = 0, tools: Optional[str] = None, enable_thinking: bool = True) -> str:
446
- """Apply chat template to messages with proper image/audio token insertion and optional tools support."""
447
- if self.model_name == "qwen3vl":
448
- return apply_chat_template_qwen3_vl(messages, num_images=num_images, num_audios=num_audios, tools=tools, enable_thinking=enable_thinking)
449
- if self.model_name == "qwen3vl-moe":
450
- return apply_chat_template_qwen3_vl_moe(messages, num_images=num_images, num_audios=num_audios, tools=tools, enable_thinking=enable_thinking)
451
- # Convert ChatMessage objects to dictionaries for the processor
452
- messages_dict = [{"role": msg.role, "content": msg.content} for msg in messages]
453
-
454
- parsed_tools = None
455
- if tools is not None and tools.strip():
456
- parsed_tools = json.loads(tools)
457
-
458
- # Use the same logic as generate.py
459
- return apply_chat_template(
460
- self.processor,
461
- self.model.config,
462
- messages_dict,
463
- num_images=num_images,
464
- num_audios=num_audios,
465
- enable_thinking=enable_thinking,
466
- tools=parsed_tools
467
- )
468
-
469
- # Embeddings
470
- def embed(
471
- self,
472
- texts: Sequence[str],
473
- config: Optional[EmbeddingConfig] = None,
474
- ) -> List[List[float]]:
475
- """Generate embeddings for texts with profiling."""
476
- # Start profiling
477
- self._start_profiling()
478
-
479
- try:
480
- # If processor/model supports embeddings, use it; otherwise, stub
481
- if hasattr(self.model, "embed"):
482
- embed_kwargs = config.__dict__ if config else {}
483
-
484
- # End prompt processing, start decode
485
- self._prompt_end()
486
- self._decode_start()
487
-
488
- result = self.model.embed(texts, **embed_kwargs)
489
-
490
- # End timing and finalize profiling data
491
- self._update_generated_tokens(0) # No generation in embedding
492
- self._set_stop_reason(StopReason.ML_STOP_REASON_COMPLETED)
493
- self._decode_end()
494
- self._end_profiling()
495
-
496
- return result
497
- else:
498
- raise NotImplementedError("Embedding not supported for this model.")
499
-
500
- except Exception as e:
501
- self._set_stop_reason(StopReason.ML_STOP_REASON_UNKNOWN)
502
- self._decode_end()
503
- self._end_profiling()
504
- raise RuntimeError(f"Error generating embeddings: {str(e)}")