nexaai 1.0.19rc7__cp310-cp310-macosx_14_0_universal2.whl → 1.0.19rc9__cp310-cp310-macosx_14_0_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nexaai might be problematic. Click here for more details.

Files changed (200) hide show
  1. nexaai/_stub.cpython-310-darwin.so +0 -0
  2. nexaai/_version.py +1 -1
  3. nexaai/binds/libnexa_bridge.dylib +0 -0
  4. nexaai/mlx_backend/vlm/generate_qwen3_vl.py +14 -31
  5. nexaai/mlx_backend/vlm/generate_qwen3_vl_moe.py +15 -32
  6. nexaai/mlx_backend/vlm/modeling/models/qwen3_vl/qwen3vl.py +7 -23
  7. nexaai/mlx_backend/vlm/modeling/models/qwen3vl_moe/qwen3vl_moe.py +8 -24
  8. {nexaai-1.0.19rc7.dist-info → nexaai-1.0.19rc9.dist-info}/METADATA +1 -1
  9. {nexaai-1.0.19rc7.dist-info → nexaai-1.0.19rc9.dist-info}/RECORD +11 -200
  10. nexaai/binds/nexa_mlx/py-lib/asr/__init__.py +0 -12
  11. nexaai/binds/nexa_mlx/py-lib/asr/interface.py +0 -122
  12. nexaai/binds/nexa_mlx/py-lib/common/__init__.py +0 -0
  13. nexaai/binds/nexa_mlx/py-lib/common/utils.py +0 -25
  14. nexaai/binds/nexa_mlx/py-lib/cv/__init__.py +0 -0
  15. nexaai/binds/nexa_mlx/py-lib/cv/generate.py +0 -195
  16. nexaai/binds/nexa_mlx/py-lib/cv/interface.py +0 -151
  17. nexaai/binds/nexa_mlx/py-lib/cv/main.py +0 -81
  18. nexaai/binds/nexa_mlx/py-lib/cv/modeling/pp_ocr_v4.py +0 -1736
  19. nexaai/binds/nexa_mlx/py-lib/embedding/__init__.py +0 -0
  20. nexaai/binds/nexa_mlx/py-lib/embedding/generate.py +0 -333
  21. nexaai/binds/nexa_mlx/py-lib/embedding/interface.py +0 -617
  22. nexaai/binds/nexa_mlx/py-lib/embedding/main.py +0 -173
  23. nexaai/binds/nexa_mlx/py-lib/embedding/modeling/__init__.py +0 -0
  24. nexaai/binds/nexa_mlx/py-lib/embedding/modeling/nexa_jina_v2.py +0 -399
  25. nexaai/binds/nexa_mlx/py-lib/image_gen/__init__.py +0 -1
  26. nexaai/binds/nexa_mlx/py-lib/image_gen/generate_sd.py +0 -244
  27. nexaai/binds/nexa_mlx/py-lib/image_gen/interface.py +0 -82
  28. nexaai/binds/nexa_mlx/py-lib/image_gen/main.py +0 -281
  29. nexaai/binds/nexa_mlx/py-lib/image_gen/stable_diffusion/__init__.py +0 -306
  30. nexaai/binds/nexa_mlx/py-lib/image_gen/stable_diffusion/clip.py +0 -116
  31. nexaai/binds/nexa_mlx/py-lib/image_gen/stable_diffusion/config.py +0 -65
  32. nexaai/binds/nexa_mlx/py-lib/image_gen/stable_diffusion/model_io.py +0 -386
  33. nexaai/binds/nexa_mlx/py-lib/image_gen/stable_diffusion/sampler.py +0 -105
  34. nexaai/binds/nexa_mlx/py-lib/image_gen/stable_diffusion/tokenizer.py +0 -100
  35. nexaai/binds/nexa_mlx/py-lib/image_gen/stable_diffusion/unet.py +0 -460
  36. nexaai/binds/nexa_mlx/py-lib/image_gen/stable_diffusion/vae.py +0 -274
  37. nexaai/binds/nexa_mlx/py-lib/llm/__init__.py +0 -0
  38. nexaai/binds/nexa_mlx/py-lib/llm/generate.py +0 -149
  39. nexaai/binds/nexa_mlx/py-lib/llm/interface.py +0 -764
  40. nexaai/binds/nexa_mlx/py-lib/llm/main.py +0 -68
  41. nexaai/binds/nexa_mlx/py-lib/rerank/__init__.py +0 -0
  42. nexaai/binds/nexa_mlx/py-lib/rerank/generate.py +0 -174
  43. nexaai/binds/nexa_mlx/py-lib/rerank/interface.py +0 -287
  44. nexaai/binds/nexa_mlx/py-lib/rerank/main.py +0 -127
  45. nexaai/binds/nexa_mlx/py-lib/rerank/modeling/__init__.py +0 -0
  46. nexaai/binds/nexa_mlx/py-lib/rerank/modeling/nexa_jina_rerank.py +0 -330
  47. nexaai/binds/nexa_mlx/py-lib/sd/__init__.py +0 -1
  48. nexaai/binds/nexa_mlx/py-lib/sd/interface.py +0 -362
  49. nexaai/binds/nexa_mlx/py-lib/sd/main.py +0 -286
  50. nexaai/binds/nexa_mlx/py-lib/sd/modeling/__init__.py +0 -306
  51. nexaai/binds/nexa_mlx/py-lib/sd/modeling/clip.py +0 -116
  52. nexaai/binds/nexa_mlx/py-lib/sd/modeling/config.py +0 -65
  53. nexaai/binds/nexa_mlx/py-lib/sd/modeling/model_io.py +0 -385
  54. nexaai/binds/nexa_mlx/py-lib/sd/modeling/sampler.py +0 -105
  55. nexaai/binds/nexa_mlx/py-lib/sd/modeling/tokenizer.py +0 -100
  56. nexaai/binds/nexa_mlx/py-lib/sd/modeling/unet.py +0 -460
  57. nexaai/binds/nexa_mlx/py-lib/sd/modeling/vae.py +0 -274
  58. nexaai/binds/nexa_mlx/py-lib/tts/__init__.py +0 -12
  59. nexaai/binds/nexa_mlx/py-lib/tts/interface.py +0 -276
  60. nexaai/binds/nexa_mlx/py-lib/vlm/__init__.py +0 -3
  61. nexaai/binds/nexa_mlx/py-lib/vlm/generate.py +0 -572
  62. nexaai/binds/nexa_mlx/py-lib/vlm/generate_qwen3_vl.py +0 -294
  63. nexaai/binds/nexa_mlx/py-lib/vlm/generate_qwen3_vl_moe.py +0 -276
  64. nexaai/binds/nexa_mlx/py-lib/vlm/interface.py +0 -504
  65. nexaai/binds/nexa_mlx/py-lib/vlm/main.py +0 -320
  66. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/__init__.py +0 -0
  67. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/convert.py +0 -68
  68. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/__init__.py +0 -0
  69. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/aya_vision/__init__.py +0 -8
  70. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/aya_vision/aya_vision.py +0 -193
  71. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/aya_vision/interpolate.py +0 -186
  72. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/aya_vision/language.py +0 -233
  73. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/aya_vision/vision.py +0 -503
  74. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/base.py +0 -202
  75. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/cache.py +0 -230
  76. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/deepseek_vl_v2/__init__.py +0 -10
  77. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/deepseek_vl_v2/conversation.py +0 -264
  78. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/deepseek_vl_v2/deepseek_vl_v2.py +0 -472
  79. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/deepseek_vl_v2/language.py +0 -591
  80. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/deepseek_vl_v2/processing_deepsek_vl_v2.py +0 -526
  81. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/deepseek_vl_v2/vision.py +0 -356
  82. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/florence2/__init__.py +0 -8
  83. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/florence2/florence2.py +0 -366
  84. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/florence2/language.py +0 -488
  85. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/florence2/vision.py +0 -591
  86. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/gemma3/__init__.py +0 -8
  87. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/gemma3/gemma3.py +0 -213
  88. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/gemma3/language.py +0 -315
  89. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/gemma3/vision.py +0 -238
  90. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/gemma3n/__init__.py +0 -2
  91. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/gemma3n/audio.py +0 -1038
  92. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/gemma3n/config.py +0 -139
  93. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/gemma3n/gemma3n.py +0 -322
  94. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/gemma3n/language.py +0 -629
  95. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/gemma3n/vision.py +0 -1022
  96. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/idefics2/__init__.py +0 -9
  97. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/idefics2/idefics2.py +0 -294
  98. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/idefics2/language.py +0 -191
  99. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/idefics2/vision.py +0 -267
  100. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/idefics3/__init__.py +0 -8
  101. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/idefics3/idefics3.py +0 -175
  102. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/idefics3/language.py +0 -192
  103. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/idefics3/vision.py +0 -233
  104. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/internvl_chat/__init__.py +0 -9
  105. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/internvl_chat/internvl_chat.py +0 -140
  106. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/internvl_chat/language.py +0 -220
  107. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/internvl_chat/processor.py +0 -393
  108. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/internvl_chat/vision.py +0 -293
  109. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/kernels.py +0 -307
  110. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/kimi_vl/__init__.py +0 -8
  111. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/kimi_vl/kimi_vl.py +0 -143
  112. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/kimi_vl/language.py +0 -509
  113. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/kimi_vl/vision.py +0 -522
  114. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/llama4/__init__.py +0 -8
  115. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/llama4/language.py +0 -386
  116. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/llama4/llama4.py +0 -138
  117. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/llama4/vision.py +0 -560
  118. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/llava/__init__.py +0 -8
  119. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/llava/language.py +0 -240
  120. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/llava/llava.py +0 -153
  121. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/llava/vision.py +0 -259
  122. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/llava_bunny/__init__.py +0 -9
  123. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/llava_bunny/language.py +0 -236
  124. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/llava_bunny/llava_bunny.py +0 -256
  125. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/llava_bunny/vision.py +0 -303
  126. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/llava_next/__init__.py +0 -8
  127. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/llava_next/language.py +0 -230
  128. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/llava_next/llava_next.py +0 -160
  129. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/llava_next/vision.py +0 -243
  130. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/mistral3/__init__.py +0 -8
  131. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/mistral3/mistral3.py +0 -283
  132. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/mllama/__init__.py +0 -8
  133. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/mllama/language.py +0 -416
  134. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/mllama/mllama.py +0 -172
  135. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/mllama/vision.py +0 -499
  136. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/molmo/__init__.py +0 -8
  137. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/molmo/language.py +0 -243
  138. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/molmo/molmo.py +0 -133
  139. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/molmo/vision.py +0 -465
  140. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/multi_modality/__init__.py +0 -10
  141. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/multi_modality/language.py +0 -230
  142. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/multi_modality/multi_modality.py +0 -385
  143. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/multi_modality/sam.py +0 -557
  144. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/multi_modality/vision.py +0 -526
  145. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/paligemma/__init__.py +0 -8
  146. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/paligemma/language.py +0 -282
  147. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/paligemma/paligemma.py +0 -160
  148. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/paligemma/vision.py +0 -242
  149. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/phi3_v/__init__.py +0 -8
  150. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/phi3_v/language.py +0 -21
  151. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/phi3_v/phi3_v.py +0 -243
  152. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/phi3_v/su_rope.py +0 -71
  153. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/phi3_v/vision.py +0 -324
  154. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/pixtral/__init__.py +0 -8
  155. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/pixtral/language.py +0 -229
  156. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/pixtral/pixtral.py +0 -161
  157. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/pixtral/vision.py +0 -320
  158. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen2_5_vl/__init__.py +0 -2
  159. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen2_5_vl/config.py +0 -108
  160. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen2_5_vl/language.py +0 -490
  161. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen2_5_vl/qwen2_5_vl.py +0 -168
  162. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen2_5_vl/vision.py +0 -414
  163. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen2_vl/__init__.py +0 -2
  164. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen2_vl/config.py +0 -104
  165. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen2_vl/language.py +0 -490
  166. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen2_vl/qwen2_vl.py +0 -167
  167. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen2_vl/vision.py +0 -312
  168. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen3_vl/llm_common/__init__.py +0 -0
  169. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen3_vl/llm_common/base.py +0 -117
  170. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen3_vl/llm_common/cache.py +0 -531
  171. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen3_vl/llm_common/generate.py +0 -701
  172. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen3_vl/llm_common/rope_utils.py +0 -255
  173. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen3_vl/llm_common/sample_utils.py +0 -303
  174. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen3_vl/llm_common/tokenizer_utils.py +0 -407
  175. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen3_vl/processor.py +0 -476
  176. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen3_vl/qwen3vl.py +0 -1223
  177. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen3vl_moe/llm_common/__init__.py +0 -0
  178. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen3vl_moe/llm_common/base.py +0 -117
  179. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen3vl_moe/llm_common/cache.py +0 -531
  180. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen3vl_moe/llm_common/generate.py +0 -701
  181. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen3vl_moe/llm_common/rope_utils.py +0 -255
  182. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen3vl_moe/llm_common/sample_utils.py +0 -303
  183. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen3vl_moe/llm_common/tokenizer_utils.py +0 -407
  184. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen3vl_moe/processor.py +0 -476
  185. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen3vl_moe/qwen3vl_moe.py +0 -1309
  186. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen3vl_moe/switch_layers.py +0 -210
  187. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/smolvlm/__init__.py +0 -8
  188. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/smolvlm/smolvlm.py +0 -62
  189. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/processing_qwen2_5_vl.py +0 -209
  190. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/processing_qwen2_vl.py +0 -215
  191. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/prompt_utils.py +0 -474
  192. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/sample_utils.py +0 -39
  193. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/tokenizer_utils.py +0 -344
  194. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/trainer/__init__.py +0 -9
  195. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/trainer/lora.py +0 -70
  196. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/trainer/trainer.py +0 -296
  197. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/trainer/utils.py +0 -160
  198. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/utils.py +0 -928
  199. {nexaai-1.0.19rc7.dist-info → nexaai-1.0.19rc9.dist-info}/WHEEL +0 -0
  200. {nexaai-1.0.19rc7.dist-info → nexaai-1.0.19rc9.dist-info}/top_level.txt +0 -0
@@ -1,244 +0,0 @@
1
- from __future__ import annotations
2
-
3
- from typing import (
4
- List,
5
- Optional,
6
- )
7
-
8
- import mlx.core as mx
9
- import numpy as np
10
- from PIL import Image as PILImage
11
- import mlx.nn as nn
12
- import os
13
-
14
- from .stable_diffusion import StableDiffusion, StableDiffusionXL
15
-
16
-
17
- class Image:
18
- def __init__(self, data: List[float], width: int, height: int, channels: int) -> None:
19
- """Initialize an image with pixel data"""
20
- self.data = data
21
- self.width = width
22
- self.height = height
23
- self.channels = channels
24
-
25
- @classmethod
26
- def from_numpy(cls, array: np.ndarray) -> 'Image':
27
- """Create Image from numpy array (H, W, C)"""
28
- height, width, channels = array.shape
29
- data = array.flatten().tolist()
30
- return cls(data, width, height, channels)
31
-
32
- @classmethod
33
- def from_pil(cls, pil_image: PILImage.Image) -> 'Image':
34
- """Create Image from PIL Image"""
35
- array = np.array(pil_image).astype(np.float32) / 255.0
36
- return cls.from_numpy(array)
37
-
38
- def to_numpy(self) -> np.ndarray:
39
- """Convert to numpy array (H, W, C)"""
40
- return np.array(self.data).reshape(self.height, self.width, self.channels)
41
-
42
- def to_pil(self) -> PILImage.Image:
43
- """Convert to PIL Image"""
44
- array = (self.to_numpy() * 255).astype(np.uint8)
45
- return PILImage.fromarray(array)
46
-
47
-
48
- class ImageSamplerConfig:
49
- def __init__(
50
- self,
51
- method: str = "ddim",
52
- steps: int = 4, # SDXL Turbo typically uses fewer steps
53
- guidance_scale: float = 0.0, # SDXL Turbo works well with no guidance
54
- eta: float = 0.0,
55
- seed: int = -1,
56
- ) -> None:
57
- """Initialize sampler configuration optimized for SDXL Turbo"""
58
- self.method = method
59
- self.steps = steps
60
- self.guidance_scale = guidance_scale
61
- self.eta = eta
62
- self.seed = seed
63
-
64
-
65
- class ImageGenerationConfig:
66
- def __init__(
67
- self,
68
- prompts: str | List[str],
69
- negative_prompts: str | List[str] | None = None,
70
- height: int = 512,
71
- width: int = 512,
72
- sampler_config: Optional[ImageSamplerConfig] = None,
73
- lora_id: int = -1, # Not used but kept for compatibility
74
- init_image: Optional[Image] = None,
75
- strength: float = 1.0,
76
- n_images: int = 1,
77
- n_rows: int = 1,
78
- decoding_batch_size: int = 1,
79
- ) -> None:
80
- """Initialize image generation configuration"""
81
- self.prompts = prompts
82
- self.negative_prompts = negative_prompts or ""
83
- self.height = height
84
- self.width = width
85
- self.sampler_config = sampler_config or ImageSamplerConfig()
86
- self.lora_id = lora_id
87
- self.init_image = init_image
88
- self.strength = strength
89
- self.n_images = n_images
90
- self.n_rows = n_rows
91
- self.decoding_batch_size = decoding_batch_size
92
-
93
-
94
- class ImageGen:
95
- def __init__(
96
- self,
97
- model_path: str,
98
- scheduler_config_path: Optional[str] = None,
99
- device: Optional[str] = None,
100
- float16: bool = True,
101
- quantize: bool = False,
102
- ) -> None:
103
- """Initialize the image generation model for SDXL Turbo"""
104
- self.model_path = model_path
105
- self.scheduler_config_path = scheduler_config_path
106
- self.float16 = float16
107
- self.quantize = quantize
108
- self.model = None
109
-
110
- @staticmethod
111
- def load_model(model_path: str, float16: bool = True, quantize: bool = False) -> StableDiffusion:
112
- """Load a model from the given path - following txt2img.py pattern"""
113
-
114
- # Check if it's a local path or HuggingFace repo
115
- # If it contains path separators or exists as a file/directory, treat as local
116
- is_local_path = (
117
- '/' in model_path or '\\' in model_path or os.path.exists(model_path))
118
-
119
- if is_local_path:
120
- # For local paths, determine model type from the path or model files
121
- if "xl" in model_path.lower() or "turbo" in model_path.lower():
122
- model = StableDiffusionXL(model_path, float16=float16)
123
- else:
124
- model = StableDiffusion(model_path, float16=float16)
125
- else:
126
- # For HuggingFace repo names, use the original logic
127
- if "xl" in model_path.lower() or "turbo" in model_path.lower():
128
- model = StableDiffusionXL(model_path, float16=float16)
129
- else:
130
- model = StableDiffusion(model_path, float16=float16)
131
-
132
- # Apply quantization if requested - same as txt2img.py
133
- if quantize:
134
- if "xl" in model_path.lower() or "turbo" in model_path.lower():
135
- nn.quantize(
136
- model.text_encoder_1, class_predicate=lambda _, m: isinstance(
137
- m, nn.Linear)
138
- )
139
- nn.quantize(
140
- model.text_encoder_2, class_predicate=lambda _, m: isinstance(
141
- m, nn.Linear)
142
- )
143
- else:
144
- nn.quantize(
145
- model.text_encoder, class_predicate=lambda _, m: isinstance(
146
- m, nn.Linear)
147
- )
148
- nn.quantize(model.unet, group_size=32, bits=8)
149
- return model
150
-
151
- def txt2img(self, prompt: str, config: ImageGenerationConfig, clear_cache: bool = True) -> Image:
152
- """Generate an image from a text prompt - following txt2img.py pattern"""
153
- if not self.model:
154
- self.model = self.load_model(self.model_path)
155
- if not self.model:
156
- raise RuntimeError("Model not loaded")
157
-
158
- sampler_config = config.sampler_config
159
-
160
- negative_prompt = ""
161
- if config.negative_prompts:
162
- negative_prompt = config.negative_prompts if isinstance(
163
- config.negative_prompts, str) else config.negative_prompts[0]
164
-
165
- # Generate latents - following txt2img.py approach
166
- latents_generator = self.model.generate_latents(
167
- prompt,
168
- n_images=1,
169
- num_steps=sampler_config.steps,
170
- cfg_weight=sampler_config.guidance_scale,
171
- negative_text=negative_prompt,
172
- seed=sampler_config.seed if sampler_config.seed >= 0 else None
173
- )
174
-
175
- # Get final latents - following txt2img.py pattern
176
- final_latents = None
177
- for latents in latents_generator:
178
- final_latents = latents
179
- mx.eval(final_latents)
180
-
181
- if final_latents is None:
182
- raise RuntimeError("No latents generated")
183
-
184
- # Decode to image - following txt2img.py pattern
185
- decoded_image = self.model.decode(final_latents)
186
- mx.eval(decoded_image)
187
-
188
- # Convert to numpy array
189
- image_array = np.array(decoded_image.squeeze(0))
190
-
191
- if clear_cache:
192
- mx.clear_cache()
193
-
194
- return Image.from_numpy(image_array)
195
-
196
- def img2img(self, init_image: Image, prompt: str, config: ImageGenerationConfig, clear_cache: bool = True) -> Image:
197
- """Generate an image from an initial image and a text prompt using SDXL Turbo"""
198
- if not self.model:
199
- self.model = self.load_model(self.model_path)
200
- if not self.model:
201
- raise RuntimeError("Model not loaded")
202
-
203
- sampler_config = config.sampler_config
204
-
205
- negative_prompt = ""
206
- if config.negative_prompts:
207
- negative_prompt = config.negative_prompts if isinstance(
208
- config.negative_prompts, str) else config.negative_prompts[0]
209
-
210
- img_tensor = _prepare_image_for_sd(
211
- init_image, config.width, config.height)
212
-
213
- # Generate latents from image
214
- latents_generator = self.model.generate_latents_from_image(
215
- img_tensor,
216
- prompt,
217
- n_images=1,
218
- strength=config.strength,
219
- num_steps=sampler_config.steps,
220
- cfg_weight=sampler_config.guidance_scale,
221
- negative_text=negative_prompt,
222
- seed=sampler_config.seed if sampler_config.seed >= 0 else None
223
- )
224
-
225
- # Get final latents
226
- final_latents = None
227
- for latents in latents_generator:
228
- final_latents = latents
229
- mx.eval(final_latents)
230
-
231
- if final_latents is None:
232
- raise RuntimeError("No latents generated")
233
-
234
- # Decode to image
235
- decoded_image = self.model.decode(final_latents)
236
- mx.eval(decoded_image)
237
-
238
- # Convert to numpy array
239
- image_array = np.array(decoded_image.squeeze(0))
240
-
241
- if clear_cache:
242
- mx.clear_cache()
243
-
244
- return Image.from_numpy(image_array)
@@ -1,82 +0,0 @@
1
- from __future__ import annotations
2
- import os
3
- from typing import Optional
4
-
5
- from ml import ImageGenCreateInput, ImageGenerationConfig, ImageGenImg2ImgInput, ImageGenTxt2ImgInput, ImageGenOutput
6
- from profiling import ProfilingMixin, StopReason
7
-
8
- from .generate_sd import ImageGen as SDImageGen, Image, ImageGenerationConfig as SDImageGenerationConfig, ImageSamplerConfig
9
-
10
-
11
- class ImageGen(ProfilingMixin):
12
- sd_gen: Optional[SDImageGen] = None
13
-
14
- def __init__(self, input: ImageGenCreateInput):
15
- """Initialize the image generation model"""
16
- self.sd_gen = SDImageGen(model_path=input.model_path)
17
-
18
- def destroy(self) -> None:
19
- """Clean up resources"""
20
- self.sd_gen = None
21
-
22
- def txt2img(self, input: ImageGenTxt2ImgInput) -> ImageGenOutput:
23
- """Generate an image from a text prompt - public interface"""
24
- height = input.config.height
25
- width = input.config.width
26
- assert height % 16 == 0, f"Height must be divisible by 16 ({height}/16={height/16})"
27
- assert width % 16 == 0, f"Width must be divisible by 16 ({width}/16={width/16})"
28
-
29
- internal_config = SDImageGenerationConfig(
30
- prompts=input.prompt,
31
- negative_prompts=input.config.negative_prompts,
32
- height=height,
33
- width=width,
34
- sampler_config=ImageSamplerConfig(
35
- steps=input.config.sampler_config.steps,
36
- guidance_scale=input.config.sampler_config.guidance_scale,
37
- seed=input.config.sampler_config.seed
38
- ),
39
- strength=input.config.strength
40
- )
41
-
42
- result_image = self.sd_gen.txt2img(input.prompt, internal_config)
43
-
44
- parent_dir = os.path.dirname(input.output_path)
45
- if parent_dir:
46
- os.makedirs(parent_dir, exist_ok=True)
47
- result_image.to_pil().save(input.output_path)
48
-
49
- return ImageGenOutput(output_image_path=input.output_path)
50
-
51
- def img2img(self, input: ImageGenImg2ImgInput) -> ImageGenOutput:
52
- """Generate an image from an initial image and a text prompt - public interface"""
53
- height = input.config.height
54
- width = input.config.width
55
- assert height % 16 == 0, f"Height must be divisible by 16 ({height}/16={height/16})"
56
- assert width % 16 == 0, f"Width must be divisible by 16 ({width}/16={width/16})"
57
-
58
- init_image = Image.from_pil(input.init_image_path)
59
-
60
- internal_config = SDImageGenerationConfig(
61
- prompts=input.prompt,
62
- negative_prompts=input.config.negative_prompts,
63
- height=height,
64
- width=width,
65
- sampler_config=ImageSamplerConfig(
66
- steps=input.config.sampler_config.steps,
67
- guidance_scale=input.config.sampler_config.guidance_scale,
68
- seed=input.config.sampler_config.seed
69
- ),
70
- init_image=init_image,
71
- strength=input.config.strength
72
- )
73
-
74
- result_image = self.sd_gen.img2img(
75
- init_image, input.prompt, internal_config)
76
-
77
- parent_dir = os.path.dirname(input.output_path)
78
- if parent_dir:
79
- os.makedirs(parent_dir, exist_ok=True)
80
- result_image.to_pil().save(input.output_path)
81
-
82
- return ImageGenOutput(output_image_path=input.output_path)
@@ -1,281 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- Command line interface for text-to-image generation using MLX backend.
4
- """
5
-
6
- import argparse
7
- import sys
8
- import os
9
- from pathlib import Path
10
- from typing import Optional
11
-
12
- # Add the parent directory to the path to import the interface
13
- sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
14
-
15
- from interface import ImageGen, ImageSamplerConfig
16
- from ml import (
17
- ImageGenCreateInput,
18
- ImageGenTxt2ImgInput,
19
- ImageGenerationConfig,
20
- ImageSamplerConfig as MLImageSamplerConfig,
21
- SchedulerConfig,
22
- ModelConfig
23
- )
24
-
25
-
26
- def create_default_config() -> ImageGenerationConfig:
27
- """Create a default image generation configuration."""
28
- sampler_config = MLImageSamplerConfig(
29
- method="ddim",
30
- steps=4, # SDXL Turbo optimized
31
- guidance_scale=0.0, # SDXL Turbo works well with no guidance
32
- eta=0.0,
33
- seed=-1
34
- )
35
-
36
- scheduler_config = SchedulerConfig(
37
- type="ddim",
38
- num_train_timesteps=1000,
39
- steps_offset=0,
40
- beta_start=0.00085,
41
- beta_end=0.012,
42
- beta_schedule="scaled_linear",
43
- prediction_type="epsilon",
44
- timestep_type="discrete",
45
- timestep_spacing="linspace",
46
- interpolation_type="linear"
47
- )
48
-
49
- return ImageGenerationConfig(
50
- prompts=[""], # Will be set by user input
51
- sampler_config=sampler_config,
52
- scheduler_config=scheduler_config,
53
- strength=1.0,
54
- negative_prompts=None,
55
- height=512,
56
- width=512
57
- )
58
-
59
-
60
- def parse_arguments():
61
- """Parse command line arguments."""
62
- parser = argparse.ArgumentParser(
63
- description="Generate images from text prompts using MLX backend",
64
- formatter_class=argparse.RawDescriptionHelpFormatter,
65
- epilog="""
66
- Examples:
67
- python main.py "a beautiful sunset over mountains"
68
- python main.py "a cat sitting on a chair" --output output.png --width 1024 --height 1024
69
- python main.py "a futuristic city" --model-path ./models/sdxl-turbo --steps 8 --seed 42
70
- """
71
- )
72
-
73
- # Required arguments
74
- parser.add_argument(
75
- "prompt",
76
- help="Text prompt for image generation"
77
- )
78
-
79
- # Optional arguments
80
- parser.add_argument(
81
- "--output", "-o",
82
- type=str,
83
- help="Output image path (default: generated_image.png)"
84
- )
85
-
86
- parser.add_argument(
87
- "--model-path", "-m",
88
- type=str,
89
- default="stabilityai/sdxl-turbo",
90
- help="Path to the model or HuggingFace model name (default: stabilityai/sdxl-turbo)"
91
- )
92
-
93
- parser.add_argument(
94
- "--width", "-w",
95
- type=int,
96
- default=512,
97
- help="Image width (must be divisible by 16, default: 512)"
98
- )
99
-
100
- parser.add_argument(
101
- "--height", "-h",
102
- type=int,
103
- default=512,
104
- help="Image height (must be divisible by 16, default: 512)"
105
- )
106
-
107
- parser.add_argument(
108
- "--steps", "-s",
109
- type=int,
110
- default=4,
111
- help="Number of denoising steps (default: 4 for SDXL Turbo)"
112
- )
113
-
114
- parser.add_argument(
115
- "--guidance-scale", "-g",
116
- type=float,
117
- default=0.0,
118
- help="Guidance scale (default: 0.0 for SDXL Turbo)"
119
- )
120
-
121
- parser.add_argument(
122
- "--seed",
123
- type=int,
124
- default=-1,
125
- help="Random seed (-1 for random, default: -1)"
126
- )
127
-
128
- parser.add_argument(
129
- "--negative-prompt", "-n",
130
- type=str,
131
- help="Negative prompt to avoid certain elements"
132
- )
133
-
134
- parser.add_argument(
135
- "--device-id",
136
- type=str,
137
- help="Device ID to use (default: auto-detect)"
138
- )
139
-
140
- parser.add_argument(
141
- "--verbose", "-v",
142
- action="store_true",
143
- help="Enable verbose logging"
144
- )
145
-
146
- return parser.parse_args()
147
-
148
-
149
- def validate_arguments(args):
150
- """Validate command line arguments."""
151
- # Check dimensions are divisible by 16
152
- if args.width % 16 != 0:
153
- raise ValueError(f"Width must be divisible by 16, got {args.width}")
154
- if args.height % 16 != 0:
155
- raise ValueError(f"Height must be divisible by 16, got {args.height}")
156
-
157
- # Check steps is positive
158
- if args.steps <= 0:
159
- raise ValueError(f"Steps must be positive, got {args.steps}")
160
-
161
- # Check guidance scale is non-negative
162
- if args.guidance_scale < 0:
163
- raise ValueError(f"Guidance scale must be non-negative, got {args.guidance_scale}")
164
-
165
-
166
- def main():
167
- """Main function for command line interface."""
168
- try:
169
- # Parse arguments
170
- args = parse_arguments()
171
-
172
- # Validate arguments
173
- validate_arguments(args)
174
-
175
- # Set up output path
176
- if args.output:
177
- output_path = Path(args.output)
178
- else:
179
- output_path = Path("generated_image.png")
180
-
181
- # Ensure output directory exists
182
- output_path.parent.mkdir(parents=True, exist_ok=True)
183
-
184
- if args.verbose:
185
- print(f"Initializing image generation...")
186
- print(f"Model: {args.model_path}")
187
- print(f"Prompt: {args.prompt}")
188
- print(f"Output: {output_path}")
189
- print(f"Dimensions: {args.width}x{args.height}")
190
- print(f"Steps: {args.steps}")
191
- print(f"Guidance scale: {args.guidance_scale}")
192
- print(f"Seed: {args.seed}")
193
- if args.negative_prompt:
194
- print(f"Negative prompt: {args.negative_prompt}")
195
-
196
- # Create model configuration
197
- model_config = ModelConfig(
198
- name="sdxl-turbo",
199
- version="1.0",
200
- description="SDXL Turbo model for fast image generation"
201
- )
202
-
203
- # Create image generator
204
- create_input = ImageGenCreateInput(
205
- model_name="sdxl-turbo",
206
- model_path=args.model_path,
207
- config=model_config,
208
- scheduler_config_path="", # Not used for SDXL Turbo
209
- plugin_id="mlx",
210
- device_id=args.device_id
211
- )
212
-
213
- image_gen = ImageGen(create_input)
214
-
215
- # Create generation configuration
216
- sampler_config = MLImageSamplerConfig(
217
- method="ddim",
218
- steps=args.steps,
219
- guidance_scale=args.guidance_scale,
220
- eta=0.0,
221
- seed=args.seed
222
- )
223
-
224
- scheduler_config = SchedulerConfig(
225
- type="ddim",
226
- num_train_timesteps=1000,
227
- steps_offset=0,
228
- beta_start=0.00085,
229
- beta_end=0.012,
230
- beta_schedule="scaled_linear",
231
- prediction_type="epsilon",
232
- timestep_type="discrete",
233
- timestep_spacing="linspace",
234
- interpolation_type="linear"
235
- )
236
-
237
- generation_config = ImageGenerationConfig(
238
- prompts=[args.prompt],
239
- sampler_config=sampler_config,
240
- scheduler_config=scheduler_config,
241
- strength=1.0,
242
- negative_prompts=[args.negative_prompt] if args.negative_prompt else None,
243
- height=args.height,
244
- width=args.width
245
- )
246
-
247
- # Create text-to-image input
248
- txt2img_input = ImageGenTxt2ImgInput(
249
- prompt=args.prompt,
250
- config=generation_config,
251
- output_path=str(output_path)
252
- )
253
-
254
- if args.verbose:
255
- print("Generating image...")
256
-
257
- # Generate image
258
- result = image_gen.txt2img(txt2img_input)
259
-
260
- if args.verbose:
261
- print(f"Image generated successfully!")
262
- print(f"Saved to: {result.output_image_path}")
263
- else:
264
- print(f"Image saved to: {result.output_image_path}")
265
-
266
- # Clean up
267
- image_gen.close()
268
-
269
- except KeyboardInterrupt:
270
- print("\nGeneration cancelled by user.")
271
- sys.exit(1)
272
- except Exception as e:
273
- print(f"Error: {e}", file=sys.stderr)
274
- if args.verbose:
275
- import traceback
276
- traceback.print_exc()
277
- sys.exit(1)
278
-
279
-
280
- if __name__ == "__main__":
281
- main()