nexaai 1.0.19rc7__cp310-cp310-macosx_14_0_universal2.whl → 1.0.19rc8__cp310-cp310-macosx_14_0_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nexaai might be problematic. Click here for more details.

Files changed (196) hide show
  1. nexaai/_stub.cpython-310-darwin.so +0 -0
  2. nexaai/_version.py +1 -1
  3. nexaai/binds/libnexa_bridge.dylib +0 -0
  4. {nexaai-1.0.19rc7.dist-info → nexaai-1.0.19rc8.dist-info}/METADATA +1 -1
  5. {nexaai-1.0.19rc7.dist-info → nexaai-1.0.19rc8.dist-info}/RECORD +7 -196
  6. nexaai/binds/nexa_mlx/py-lib/asr/__init__.py +0 -12
  7. nexaai/binds/nexa_mlx/py-lib/asr/interface.py +0 -122
  8. nexaai/binds/nexa_mlx/py-lib/common/__init__.py +0 -0
  9. nexaai/binds/nexa_mlx/py-lib/common/utils.py +0 -25
  10. nexaai/binds/nexa_mlx/py-lib/cv/__init__.py +0 -0
  11. nexaai/binds/nexa_mlx/py-lib/cv/generate.py +0 -195
  12. nexaai/binds/nexa_mlx/py-lib/cv/interface.py +0 -151
  13. nexaai/binds/nexa_mlx/py-lib/cv/main.py +0 -81
  14. nexaai/binds/nexa_mlx/py-lib/cv/modeling/pp_ocr_v4.py +0 -1736
  15. nexaai/binds/nexa_mlx/py-lib/embedding/__init__.py +0 -0
  16. nexaai/binds/nexa_mlx/py-lib/embedding/generate.py +0 -333
  17. nexaai/binds/nexa_mlx/py-lib/embedding/interface.py +0 -617
  18. nexaai/binds/nexa_mlx/py-lib/embedding/main.py +0 -173
  19. nexaai/binds/nexa_mlx/py-lib/embedding/modeling/__init__.py +0 -0
  20. nexaai/binds/nexa_mlx/py-lib/embedding/modeling/nexa_jina_v2.py +0 -399
  21. nexaai/binds/nexa_mlx/py-lib/image_gen/__init__.py +0 -1
  22. nexaai/binds/nexa_mlx/py-lib/image_gen/generate_sd.py +0 -244
  23. nexaai/binds/nexa_mlx/py-lib/image_gen/interface.py +0 -82
  24. nexaai/binds/nexa_mlx/py-lib/image_gen/main.py +0 -281
  25. nexaai/binds/nexa_mlx/py-lib/image_gen/stable_diffusion/__init__.py +0 -306
  26. nexaai/binds/nexa_mlx/py-lib/image_gen/stable_diffusion/clip.py +0 -116
  27. nexaai/binds/nexa_mlx/py-lib/image_gen/stable_diffusion/config.py +0 -65
  28. nexaai/binds/nexa_mlx/py-lib/image_gen/stable_diffusion/model_io.py +0 -386
  29. nexaai/binds/nexa_mlx/py-lib/image_gen/stable_diffusion/sampler.py +0 -105
  30. nexaai/binds/nexa_mlx/py-lib/image_gen/stable_diffusion/tokenizer.py +0 -100
  31. nexaai/binds/nexa_mlx/py-lib/image_gen/stable_diffusion/unet.py +0 -460
  32. nexaai/binds/nexa_mlx/py-lib/image_gen/stable_diffusion/vae.py +0 -274
  33. nexaai/binds/nexa_mlx/py-lib/llm/__init__.py +0 -0
  34. nexaai/binds/nexa_mlx/py-lib/llm/generate.py +0 -149
  35. nexaai/binds/nexa_mlx/py-lib/llm/interface.py +0 -764
  36. nexaai/binds/nexa_mlx/py-lib/llm/main.py +0 -68
  37. nexaai/binds/nexa_mlx/py-lib/rerank/__init__.py +0 -0
  38. nexaai/binds/nexa_mlx/py-lib/rerank/generate.py +0 -174
  39. nexaai/binds/nexa_mlx/py-lib/rerank/interface.py +0 -287
  40. nexaai/binds/nexa_mlx/py-lib/rerank/main.py +0 -127
  41. nexaai/binds/nexa_mlx/py-lib/rerank/modeling/__init__.py +0 -0
  42. nexaai/binds/nexa_mlx/py-lib/rerank/modeling/nexa_jina_rerank.py +0 -330
  43. nexaai/binds/nexa_mlx/py-lib/sd/__init__.py +0 -1
  44. nexaai/binds/nexa_mlx/py-lib/sd/interface.py +0 -362
  45. nexaai/binds/nexa_mlx/py-lib/sd/main.py +0 -286
  46. nexaai/binds/nexa_mlx/py-lib/sd/modeling/__init__.py +0 -306
  47. nexaai/binds/nexa_mlx/py-lib/sd/modeling/clip.py +0 -116
  48. nexaai/binds/nexa_mlx/py-lib/sd/modeling/config.py +0 -65
  49. nexaai/binds/nexa_mlx/py-lib/sd/modeling/model_io.py +0 -385
  50. nexaai/binds/nexa_mlx/py-lib/sd/modeling/sampler.py +0 -105
  51. nexaai/binds/nexa_mlx/py-lib/sd/modeling/tokenizer.py +0 -100
  52. nexaai/binds/nexa_mlx/py-lib/sd/modeling/unet.py +0 -460
  53. nexaai/binds/nexa_mlx/py-lib/sd/modeling/vae.py +0 -274
  54. nexaai/binds/nexa_mlx/py-lib/tts/__init__.py +0 -12
  55. nexaai/binds/nexa_mlx/py-lib/tts/interface.py +0 -276
  56. nexaai/binds/nexa_mlx/py-lib/vlm/__init__.py +0 -3
  57. nexaai/binds/nexa_mlx/py-lib/vlm/generate.py +0 -572
  58. nexaai/binds/nexa_mlx/py-lib/vlm/generate_qwen3_vl.py +0 -294
  59. nexaai/binds/nexa_mlx/py-lib/vlm/generate_qwen3_vl_moe.py +0 -276
  60. nexaai/binds/nexa_mlx/py-lib/vlm/interface.py +0 -504
  61. nexaai/binds/nexa_mlx/py-lib/vlm/main.py +0 -320
  62. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/__init__.py +0 -0
  63. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/convert.py +0 -68
  64. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/__init__.py +0 -0
  65. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/aya_vision/__init__.py +0 -8
  66. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/aya_vision/aya_vision.py +0 -193
  67. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/aya_vision/interpolate.py +0 -186
  68. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/aya_vision/language.py +0 -233
  69. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/aya_vision/vision.py +0 -503
  70. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/base.py +0 -202
  71. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/cache.py +0 -230
  72. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/deepseek_vl_v2/__init__.py +0 -10
  73. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/deepseek_vl_v2/conversation.py +0 -264
  74. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/deepseek_vl_v2/deepseek_vl_v2.py +0 -472
  75. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/deepseek_vl_v2/language.py +0 -591
  76. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/deepseek_vl_v2/processing_deepsek_vl_v2.py +0 -526
  77. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/deepseek_vl_v2/vision.py +0 -356
  78. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/florence2/__init__.py +0 -8
  79. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/florence2/florence2.py +0 -366
  80. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/florence2/language.py +0 -488
  81. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/florence2/vision.py +0 -591
  82. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/gemma3/__init__.py +0 -8
  83. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/gemma3/gemma3.py +0 -213
  84. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/gemma3/language.py +0 -315
  85. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/gemma3/vision.py +0 -238
  86. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/gemma3n/__init__.py +0 -2
  87. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/gemma3n/audio.py +0 -1038
  88. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/gemma3n/config.py +0 -139
  89. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/gemma3n/gemma3n.py +0 -322
  90. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/gemma3n/language.py +0 -629
  91. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/gemma3n/vision.py +0 -1022
  92. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/idefics2/__init__.py +0 -9
  93. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/idefics2/idefics2.py +0 -294
  94. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/idefics2/language.py +0 -191
  95. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/idefics2/vision.py +0 -267
  96. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/idefics3/__init__.py +0 -8
  97. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/idefics3/idefics3.py +0 -175
  98. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/idefics3/language.py +0 -192
  99. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/idefics3/vision.py +0 -233
  100. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/internvl_chat/__init__.py +0 -9
  101. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/internvl_chat/internvl_chat.py +0 -140
  102. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/internvl_chat/language.py +0 -220
  103. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/internvl_chat/processor.py +0 -393
  104. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/internvl_chat/vision.py +0 -293
  105. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/kernels.py +0 -307
  106. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/kimi_vl/__init__.py +0 -8
  107. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/kimi_vl/kimi_vl.py +0 -143
  108. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/kimi_vl/language.py +0 -509
  109. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/kimi_vl/vision.py +0 -522
  110. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/llama4/__init__.py +0 -8
  111. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/llama4/language.py +0 -386
  112. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/llama4/llama4.py +0 -138
  113. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/llama4/vision.py +0 -560
  114. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/llava/__init__.py +0 -8
  115. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/llava/language.py +0 -240
  116. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/llava/llava.py +0 -153
  117. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/llava/vision.py +0 -259
  118. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/llava_bunny/__init__.py +0 -9
  119. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/llava_bunny/language.py +0 -236
  120. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/llava_bunny/llava_bunny.py +0 -256
  121. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/llava_bunny/vision.py +0 -303
  122. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/llava_next/__init__.py +0 -8
  123. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/llava_next/language.py +0 -230
  124. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/llava_next/llava_next.py +0 -160
  125. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/llava_next/vision.py +0 -243
  126. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/mistral3/__init__.py +0 -8
  127. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/mistral3/mistral3.py +0 -283
  128. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/mllama/__init__.py +0 -8
  129. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/mllama/language.py +0 -416
  130. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/mllama/mllama.py +0 -172
  131. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/mllama/vision.py +0 -499
  132. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/molmo/__init__.py +0 -8
  133. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/molmo/language.py +0 -243
  134. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/molmo/molmo.py +0 -133
  135. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/molmo/vision.py +0 -465
  136. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/multi_modality/__init__.py +0 -10
  137. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/multi_modality/language.py +0 -230
  138. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/multi_modality/multi_modality.py +0 -385
  139. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/multi_modality/sam.py +0 -557
  140. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/multi_modality/vision.py +0 -526
  141. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/paligemma/__init__.py +0 -8
  142. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/paligemma/language.py +0 -282
  143. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/paligemma/paligemma.py +0 -160
  144. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/paligemma/vision.py +0 -242
  145. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/phi3_v/__init__.py +0 -8
  146. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/phi3_v/language.py +0 -21
  147. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/phi3_v/phi3_v.py +0 -243
  148. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/phi3_v/su_rope.py +0 -71
  149. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/phi3_v/vision.py +0 -324
  150. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/pixtral/__init__.py +0 -8
  151. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/pixtral/language.py +0 -229
  152. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/pixtral/pixtral.py +0 -161
  153. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/pixtral/vision.py +0 -320
  154. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen2_5_vl/__init__.py +0 -2
  155. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen2_5_vl/config.py +0 -108
  156. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen2_5_vl/language.py +0 -490
  157. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen2_5_vl/qwen2_5_vl.py +0 -168
  158. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen2_5_vl/vision.py +0 -414
  159. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen2_vl/__init__.py +0 -2
  160. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen2_vl/config.py +0 -104
  161. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen2_vl/language.py +0 -490
  162. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen2_vl/qwen2_vl.py +0 -167
  163. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen2_vl/vision.py +0 -312
  164. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen3_vl/llm_common/__init__.py +0 -0
  165. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen3_vl/llm_common/base.py +0 -117
  166. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen3_vl/llm_common/cache.py +0 -531
  167. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen3_vl/llm_common/generate.py +0 -701
  168. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen3_vl/llm_common/rope_utils.py +0 -255
  169. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen3_vl/llm_common/sample_utils.py +0 -303
  170. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen3_vl/llm_common/tokenizer_utils.py +0 -407
  171. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen3_vl/processor.py +0 -476
  172. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen3_vl/qwen3vl.py +0 -1223
  173. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen3vl_moe/llm_common/__init__.py +0 -0
  174. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen3vl_moe/llm_common/base.py +0 -117
  175. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen3vl_moe/llm_common/cache.py +0 -531
  176. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen3vl_moe/llm_common/generate.py +0 -701
  177. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen3vl_moe/llm_common/rope_utils.py +0 -255
  178. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen3vl_moe/llm_common/sample_utils.py +0 -303
  179. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen3vl_moe/llm_common/tokenizer_utils.py +0 -407
  180. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen3vl_moe/processor.py +0 -476
  181. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen3vl_moe/qwen3vl_moe.py +0 -1309
  182. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen3vl_moe/switch_layers.py +0 -210
  183. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/smolvlm/__init__.py +0 -8
  184. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/smolvlm/smolvlm.py +0 -62
  185. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/processing_qwen2_5_vl.py +0 -209
  186. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/processing_qwen2_vl.py +0 -215
  187. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/prompt_utils.py +0 -474
  188. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/sample_utils.py +0 -39
  189. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/tokenizer_utils.py +0 -344
  190. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/trainer/__init__.py +0 -9
  191. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/trainer/lora.py +0 -70
  192. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/trainer/trainer.py +0 -296
  193. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/trainer/utils.py +0 -160
  194. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/utils.py +0 -928
  195. {nexaai-1.0.19rc7.dist-info → nexaai-1.0.19rc8.dist-info}/WHEEL +0 -0
  196. {nexaai-1.0.19rc7.dist-info → nexaai-1.0.19rc8.dist-info}/top_level.txt +0 -0
@@ -1,504 +0,0 @@
1
- from __future__ import annotations
2
-
3
- import json
4
- import os
5
- import time
6
- from typing import Any, List, Optional, Sequence, Tuple, Union
7
- import mlx.core as mx
8
- import codecs
9
- from dataclasses import dataclass
10
-
11
- # Import configs and callback types from ml.py for API alignment
12
- from ml import (
13
- VLM as BaseVLM,
14
- SamplerConfig,
15
- GenerationConfig,
16
- ChatMessage,
17
- EmbeddingConfig,
18
- TokenCallback,
19
- Path,
20
- Tool, # Add Path alias for type hints
21
- )
22
-
23
- # Import profiling module
24
- from profiling import ProfilingMixin, ProfilingData, StopReason
25
-
26
- # Import from the actual mlx_vlm structure
27
- from .generate import generate, stream_generate, load
28
- from .generate_qwen3_vl import apply_chat_template_qwen3_vl, stream_generate_qwen3_vl, load_qwen3_vl, ContextLengthExceededError
29
-
30
- from .generate_qwen3_vl_moe import apply_chat_template_qwen3_vl as apply_chat_template_qwen3_vl_moe
31
- from .generate_qwen3_vl_moe import stream_generate_qwen3_vl as stream_generate_qwen3_vl_moe
32
- from .generate_qwen3_vl_moe import load_qwen3_vl as load_qwen3_vl_moe
33
-
34
- from .modeling.prompt_utils import apply_chat_template
35
-
36
- # --------------------------------------------------------------------------------------
37
- # Updated GenerationResult to match the new structure
38
- # --------------------------------------------------------------------------------------
39
-
40
- @dataclass
41
- class GenerationResult:
42
- text: str = ""
43
- token: Optional[int] = None
44
- logprobs: Optional[List[float]] = None
45
- prompt_tokens: int = 0
46
- generation_tokens: int = 0
47
- total_tokens: int = 0
48
- prompt_tps: float = 0.0
49
- generation_tps: float = 0.0
50
- peak_memory: float = 0.0
51
- # --------------------------------------------------------------------------------------
52
- # VLM (Vision-Language Model)
53
- # --------------------------------------------------------------------------------------
54
-
55
- class VLM(ProfilingMixin):
56
- """
57
- Vision-Language Models for mlx-vlm
58
- API aligned with ml.py VLM abstract base class.
59
- """
60
-
61
- def __init__(
62
- self,
63
- model_name: Optional[str],
64
- model_path: Path,
65
- mmproj_path: Path,
66
- context_length: int,
67
- device: Optional[str] = None,
68
- ) -> None:
69
- # Initialize profiling mixin
70
- ProfilingMixin.__init__(self)
71
-
72
- # Check if model_path is a file, if so use its parent directory
73
- if os.path.isfile(model_path):
74
- model_path = os.path.dirname(model_path)
75
-
76
- self.model_path = model_path
77
- self.model_name = model_name
78
- self.mmproj_path = mmproj_path
79
- self.context_length = context_length
80
- self.device = device
81
-
82
- if model_name == "qwen3vl-moe":
83
- load_impl = load_qwen3_vl_moe
84
- elif model_name == "qwen3vl":
85
- load_impl = load_qwen3_vl
86
- else:
87
- load_impl = load
88
-
89
- self.model, self.processor = load_impl(str(model_path))
90
-
91
- # Init deafutl sampler config with defualt.
92
- self.sampler_config = SamplerConfig()
93
-
94
- # Track global character position for incremental processing
95
- self.global_n_past_chars = 0
96
-
97
- def destroy(self) -> None:
98
- """Destroy the model and free resources."""
99
- self.model = None
100
- self.processor = None
101
-
102
- def reset(self) -> None:
103
- """Reset the model state."""
104
- self._reset_cache()
105
- self.global_n_past_chars = 0
106
-
107
- def _reset_cache(self) -> None:
108
- """Reset the KV cache."""
109
- # If the model has a cache, reset it
110
- if hasattr(self.model, "cache"):
111
- self.model.cache = None
112
-
113
- # Tokenization
114
- def encode(self, text: str) -> List[int]:
115
- """Encode text to token IDs."""
116
- return self.processor.encode(text)
117
-
118
- def decode(self, token_ids: Sequence[int]) -> str:
119
- """Decode token IDs to text."""
120
- return self.processor.decode(token_ids)
121
-
122
- # Sampler
123
- def set_sampler(self, config: SamplerConfig) -> None:
124
- """Set sampler configuration."""
125
- self.sampler_config = config
126
-
127
- def reset_sampler(self) -> None:
128
- """Reset sampler to default configuration."""
129
- self.sampler_config = None
130
-
131
- # Generation
132
- def generate(
133
- self,
134
- prompt: str,
135
- config: Optional[GenerationConfig] = None,
136
- ) -> GenerationResult:
137
- """Generate text from prompt."""
138
- # Start profiling
139
- self._start_profiling()
140
-
141
- gen_kwargs = {}
142
- if config is not None:
143
- gen_kwargs = config.__dict__.copy()
144
- # Remove image_paths and audio_paths from config as they'll be handled separately
145
- gen_kwargs.pop('image_paths', None)
146
- gen_kwargs.pop('audio_paths', None)
147
- if self.sampler_config is not None:
148
- gen_kwargs.update(self.sampler_config.__dict__)
149
-
150
- # Get image and audio paths from config
151
- image_paths = config.image_paths if config else None
152
- audio_paths = config.audio_paths if config else None
153
-
154
- # Convert paths to strings for generate function
155
- image_list = [str(path) for path in image_paths] if image_paths else None
156
- audio_list = [str(path) for path in audio_paths] if audio_paths else None
157
-
158
- # Extract incremental portion of the prompt (similar to llama.cpp VLM)
159
- full_prompt_len = len(prompt)
160
- incremental_prompt = prompt
161
-
162
- # Apply incremental processing only for non-qwen3vl models
163
- # qwen3vl requires complete JSON conversation structure
164
- if self.model_name != "qwen3vl":
165
- if self.global_n_past_chars < full_prompt_len:
166
- incremental_prompt = prompt[self.global_n_past_chars:]
167
- else:
168
- # No new text to process
169
- incremental_prompt = ""
170
-
171
- # End prompt processing, start decode
172
- self._prompt_end()
173
- self._decode_start()
174
-
175
- try:
176
- # Start timing for generation
177
- generation_start_time = time.perf_counter()
178
-
179
- text, stats = generate(
180
- self.model,
181
- self.processor,
182
- incremental_prompt, # Use incremental prompt instead of full prompt
183
- image=image_list,
184
- audio=audio_list,
185
- **gen_kwargs,
186
- )
187
-
188
- # End timing for generation
189
- generation_end_time = time.perf_counter()
190
-
191
- # Calculate average time per token and estimate TTFT
192
- generated_tokens = stats.get("output_tokens", 0)
193
- if generated_tokens > 0:
194
- total_generation_time = generation_end_time - generation_start_time
195
- avg_time_per_token = total_generation_time / generated_tokens
196
- # TTFT = prompt processing time + first token generation time
197
- # This provides a more accurate estimate than the previous approximation
198
- estimated_ttft = (self._profiling_context.prompt_end_time - self._profiling_context.prompt_start_time) + avg_time_per_token
199
- # Update the profiling context with estimated TTFT
200
- self._profiling_context.first_token_time = self._profiling_context.prompt_start_time + estimated_ttft
201
- self._profiling_context.ttft_recorded = True
202
- else:
203
- # If no tokens generated, use total generation time as TTFT
204
- self._record_ttft()
205
-
206
- # Update profiling data
207
- prompt_tokens = stats.get("input_tokens", 0)
208
- self._update_prompt_tokens(prompt_tokens)
209
- self._update_generated_tokens(generated_tokens)
210
- self._set_stop_reason(StopReason.ML_STOP_REASON_COMPLETED)
211
-
212
- # Update global character position (not needed for qwen3vl JSON processing)
213
- if self.model_name != "qwen3vl":
214
- old_pos = self.global_n_past_chars
215
- self.global_n_past_chars = full_prompt_len + len(text)
216
-
217
- self._decode_end()
218
- self._end_profiling()
219
-
220
- result = GenerationResult(
221
- text=text,
222
- prompt_tokens=prompt_tokens,
223
- generation_tokens=generated_tokens,
224
- total_tokens=stats.get("total_tokens", 0),
225
- prompt_tps=stats.get("prompt_tps", 0.0),
226
- generation_tps=stats.get("generation_tps", 0.0),
227
- peak_memory=stats.get("peak_memory", 0.0),
228
- )
229
-
230
- return result
231
-
232
- except ContextLengthExceededError as e:
233
- self._set_stop_reason(StopReason.ML_STOP_REASON_UNKNOWN)
234
- self._decode_end()
235
- self._end_profiling()
236
- # Re-raise the original exception without wrapping it
237
- raise e
238
- except Exception as e:
239
- import traceback
240
- traceback.print_exc()
241
- self._set_stop_reason(StopReason.ML_STOP_REASON_UNKNOWN)
242
- self._decode_end()
243
- self._end_profiling()
244
- raise RuntimeError(f"Generation error: {str(e)}")
245
-
246
- def generate_stream(
247
- self,
248
- prompt: str,
249
- config: Optional[GenerationConfig],
250
- on_token: Optional[TokenCallback],
251
- ) -> GenerationResult:
252
- """Generate text with streaming callback. Unified method for both text and multimodal generation."""
253
-
254
- # Start profiling
255
- self._start_profiling()
256
-
257
- gen_kwargs = {}
258
- if config is not None:
259
- gen_kwargs = config.__dict__.copy()
260
- # Remove image_paths and audio_paths from config as they'll be handled separately
261
- gen_kwargs.pop('image_paths', None)
262
- gen_kwargs.pop('audio_paths', None)
263
- if self.sampler_config is not None:
264
- gen_kwargs.update(self.sampler_config.__dict__)
265
-
266
-
267
- # Get image and audio paths from config
268
- image_paths = config.image_paths if config else None
269
- audio_paths = config.audio_paths if config else None
270
-
271
- # Convert paths to strings for stream_generate function
272
- image_list = [str(path) for path in image_paths] if image_paths else None
273
- audio_list = [str(path) for path in audio_paths] if audio_paths else None
274
-
275
-
276
- # Extract incremental portion of the prompt (similar to llama.cpp VLM)
277
- full_prompt_len = len(prompt)
278
- incremental_prompt = prompt
279
-
280
-
281
- # Apply incremental processing only for non-qwen3vl models
282
- # qwen3vl requires complete JSON conversation structure
283
- if self.model_name != "qwen3vl":
284
- if self.global_n_past_chars < full_prompt_len:
285
- incremental_prompt = prompt[self.global_n_past_chars:]
286
- else:
287
- # No new text to process
288
- incremental_prompt = ""
289
-
290
- # End prompt processing, start decode
291
- self._prompt_end()
292
- self._decode_start()
293
-
294
- text = ""
295
- last_result = None
296
- first_token = True
297
-
298
- if self.model_name == "qwen3vl-moe":
299
- stream_generate_impl = stream_generate_qwen3_vl_moe
300
- elif self.model_name == "qwen3vl":
301
- stream_generate_impl = stream_generate_qwen3_vl
302
- else:
303
- stream_generate_impl = stream_generate
304
-
305
- try:
306
- token_count = 0
307
-
308
- for result in stream_generate_impl(
309
- self.model,
310
- self.processor,
311
- incremental_prompt, # Use incremental prompt instead of full prompt
312
- image=image_list,
313
- audio=audio_list,
314
- **gen_kwargs,
315
- ):
316
- token_count += 1
317
-
318
- # Record TTFT on first token
319
- if first_token:
320
- self._record_ttft()
321
- first_token = False
322
-
323
- # Call the token callback if provided
324
- if on_token is not None:
325
- if not on_token(result.text):
326
- self._set_stop_reason(StopReason.ML_STOP_REASON_USER)
327
- break
328
- text += result.text
329
- last_result = result
330
-
331
-
332
- # Set stop reason if not user stop
333
- if self._profiling_context.stop_reason != StopReason.ML_STOP_REASON_USER:
334
- self._set_stop_reason(StopReason.ML_STOP_REASON_EOS)
335
-
336
- # Update profiling data
337
- if last_result:
338
- self._update_prompt_tokens(last_result.prompt_tokens)
339
- self._update_generated_tokens(last_result.generation_tokens)
340
-
341
- # Update global character position (not needed for qwen3vl JSON processing)
342
- if self.model_name != "qwen3vl":
343
- old_pos = self.global_n_past_chars
344
- self.global_n_past_chars = full_prompt_len + len(text)
345
-
346
- self._decode_end()
347
- self._end_profiling()
348
-
349
- result = GenerationResult(
350
- text=text,
351
- token=last_result.token if last_result else None,
352
- logprobs=last_result.logprobs if last_result else None,
353
- prompt_tokens=last_result.prompt_tokens if last_result else 0,
354
- generation_tokens=last_result.generation_tokens if last_result else 0,
355
- total_tokens=(last_result.prompt_tokens + last_result.generation_tokens) if last_result else 0,
356
- prompt_tps=last_result.prompt_tps if last_result else 0.0,
357
- generation_tps=last_result.generation_tps if last_result else 0.0,
358
- peak_memory=last_result.peak_memory if last_result else 0.0,
359
- )
360
-
361
- return result
362
-
363
- except ContextLengthExceededError as e:
364
- self._set_stop_reason(StopReason.ML_STOP_REASON_UNKNOWN)
365
- self._decode_end()
366
- self._end_profiling()
367
- # Re-raise the original exception without wrapping it
368
- raise e
369
- except Exception as e:
370
- import traceback
371
- traceback.print_exc()
372
- self._set_stop_reason(StopReason.ML_STOP_REASON_UNKNOWN)
373
- self._decode_end()
374
- self._end_profiling()
375
- raise RuntimeError(f"Streaming generation error: {str(e)}")
376
-
377
- # Legacy multimodal methods - kept for backward compatibility but delegate to unified method
378
- def generate_multimodal(
379
- self,
380
- prompt: str,
381
- image_paths: Optional[Sequence[Path]] = None,
382
- audio_paths: Optional[Sequence[Path]] = None,
383
- config: Optional[GenerationConfig] = None,
384
- ) -> str:
385
- """Generate text from prompt with multiple images and audio."""
386
- # Create config with media paths if not provided
387
- if config is None:
388
- config = GenerationConfig()
389
-
390
- # Update config with provided paths
391
- if image_paths is not None:
392
- config.image_paths = image_paths
393
- if audio_paths is not None:
394
- config.audio_paths = audio_paths
395
-
396
- # Delegate to unified generate method and extract text
397
- result = self.generate(prompt, config)
398
- return result.text
399
-
400
- def generate_stream_multimodal(
401
- self,
402
- prompt: str,
403
- image_paths: Optional[Sequence[Path]] = None,
404
- audio_paths: Optional[Sequence[Path]] = None,
405
- config: Optional[GenerationConfig] = None,
406
- on_token: Optional[TokenCallback] = None,
407
- ) -> str:
408
- """Generate text from prompt with multiple images and audio using streaming callback."""
409
- # Create config with media paths if not provided
410
- if config is None:
411
- config = GenerationConfig()
412
-
413
- # Update config with provided paths
414
- if image_paths is not None:
415
- config.image_paths = image_paths
416
- if audio_paths is not None:
417
- config.audio_paths = audio_paths
418
-
419
- # Delegate to unified generate_stream method and extract text
420
- result = self.generate_stream(prompt, config, on_token)
421
- return result.text
422
-
423
- def get_chat_template(self, template_name: str) -> str:
424
- """Get chat template by name."""
425
- # This is a stub; actual implementation depends on processor internals
426
- if hasattr(self.processor, "get_chat_template"):
427
- return self.processor.get_chat_template(template_name)
428
- return ""
429
-
430
- def apply_chat_template(self, messages: Sequence[ChatMessage], tools: Optional[str] = None, enable_thinking: bool = True) -> str:
431
- """Apply chat template to messages with optional tools support."""
432
- if hasattr(self.processor, "apply_chat_template"):
433
- # Convert ChatMessage objects to dictionaries for the processor
434
- messages_dict = [{"role": msg.role, "content": msg.content} for msg in messages]
435
-
436
- parsed_tools = None
437
- if tools is not None and tools.strip():
438
- parsed_tools = json.loads(tools)
439
-
440
- result = apply_chat_template(self.processor, self.model.config, messages_dict, add_generation_prompt=True, enable_thinking=enable_thinking, tools=parsed_tools)
441
- return result
442
- # Fallback: join messages
443
- return "\n".join([f"{m.role}: {m.content}" for m in messages])
444
-
445
- def apply_chat_template_with_media(self, messages: Sequence[ChatMessage], num_images: int = 0, num_audios: int = 0, tools: Optional[str] = None, enable_thinking: bool = True) -> str:
446
- """Apply chat template to messages with proper image/audio token insertion and optional tools support."""
447
- if self.model_name == "qwen3vl":
448
- return apply_chat_template_qwen3_vl(messages, num_images=num_images, num_audios=num_audios, tools=tools, enable_thinking=enable_thinking)
449
- if self.model_name == "qwen3vl-moe":
450
- return apply_chat_template_qwen3_vl_moe(messages, num_images=num_images, num_audios=num_audios, tools=tools, enable_thinking=enable_thinking)
451
- # Convert ChatMessage objects to dictionaries for the processor
452
- messages_dict = [{"role": msg.role, "content": msg.content} for msg in messages]
453
-
454
- parsed_tools = None
455
- if tools is not None and tools.strip():
456
- parsed_tools = json.loads(tools)
457
-
458
- # Use the same logic as generate.py
459
- return apply_chat_template(
460
- self.processor,
461
- self.model.config,
462
- messages_dict,
463
- num_images=num_images,
464
- num_audios=num_audios,
465
- enable_thinking=enable_thinking,
466
- tools=parsed_tools
467
- )
468
-
469
- # Embeddings
470
- def embed(
471
- self,
472
- texts: Sequence[str],
473
- config: Optional[EmbeddingConfig] = None,
474
- ) -> List[List[float]]:
475
- """Generate embeddings for texts with profiling."""
476
- # Start profiling
477
- self._start_profiling()
478
-
479
- try:
480
- # If processor/model supports embeddings, use it; otherwise, stub
481
- if hasattr(self.model, "embed"):
482
- embed_kwargs = config.__dict__ if config else {}
483
-
484
- # End prompt processing, start decode
485
- self._prompt_end()
486
- self._decode_start()
487
-
488
- result = self.model.embed(texts, **embed_kwargs)
489
-
490
- # End timing and finalize profiling data
491
- self._update_generated_tokens(0) # No generation in embedding
492
- self._set_stop_reason(StopReason.ML_STOP_REASON_COMPLETED)
493
- self._decode_end()
494
- self._end_profiling()
495
-
496
- return result
497
- else:
498
- raise NotImplementedError("Embedding not supported for this model.")
499
-
500
- except Exception as e:
501
- self._set_stop_reason(StopReason.ML_STOP_REASON_UNKNOWN)
502
- self._decode_end()
503
- self._end_profiling()
504
- raise RuntimeError(f"Error generating embeddings: {str(e)}")