nexaai 1.0.19rc7__cp310-cp310-macosx_14_0_universal2.whl → 1.0.19rc9__cp310-cp310-macosx_14_0_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nexaai might be problematic. Click here for more details.

Files changed (200) hide show
  1. nexaai/_stub.cpython-310-darwin.so +0 -0
  2. nexaai/_version.py +1 -1
  3. nexaai/binds/libnexa_bridge.dylib +0 -0
  4. nexaai/mlx_backend/vlm/generate_qwen3_vl.py +14 -31
  5. nexaai/mlx_backend/vlm/generate_qwen3_vl_moe.py +15 -32
  6. nexaai/mlx_backend/vlm/modeling/models/qwen3_vl/qwen3vl.py +7 -23
  7. nexaai/mlx_backend/vlm/modeling/models/qwen3vl_moe/qwen3vl_moe.py +8 -24
  8. {nexaai-1.0.19rc7.dist-info → nexaai-1.0.19rc9.dist-info}/METADATA +1 -1
  9. {nexaai-1.0.19rc7.dist-info → nexaai-1.0.19rc9.dist-info}/RECORD +11 -200
  10. nexaai/binds/nexa_mlx/py-lib/asr/__init__.py +0 -12
  11. nexaai/binds/nexa_mlx/py-lib/asr/interface.py +0 -122
  12. nexaai/binds/nexa_mlx/py-lib/common/__init__.py +0 -0
  13. nexaai/binds/nexa_mlx/py-lib/common/utils.py +0 -25
  14. nexaai/binds/nexa_mlx/py-lib/cv/__init__.py +0 -0
  15. nexaai/binds/nexa_mlx/py-lib/cv/generate.py +0 -195
  16. nexaai/binds/nexa_mlx/py-lib/cv/interface.py +0 -151
  17. nexaai/binds/nexa_mlx/py-lib/cv/main.py +0 -81
  18. nexaai/binds/nexa_mlx/py-lib/cv/modeling/pp_ocr_v4.py +0 -1736
  19. nexaai/binds/nexa_mlx/py-lib/embedding/__init__.py +0 -0
  20. nexaai/binds/nexa_mlx/py-lib/embedding/generate.py +0 -333
  21. nexaai/binds/nexa_mlx/py-lib/embedding/interface.py +0 -617
  22. nexaai/binds/nexa_mlx/py-lib/embedding/main.py +0 -173
  23. nexaai/binds/nexa_mlx/py-lib/embedding/modeling/__init__.py +0 -0
  24. nexaai/binds/nexa_mlx/py-lib/embedding/modeling/nexa_jina_v2.py +0 -399
  25. nexaai/binds/nexa_mlx/py-lib/image_gen/__init__.py +0 -1
  26. nexaai/binds/nexa_mlx/py-lib/image_gen/generate_sd.py +0 -244
  27. nexaai/binds/nexa_mlx/py-lib/image_gen/interface.py +0 -82
  28. nexaai/binds/nexa_mlx/py-lib/image_gen/main.py +0 -281
  29. nexaai/binds/nexa_mlx/py-lib/image_gen/stable_diffusion/__init__.py +0 -306
  30. nexaai/binds/nexa_mlx/py-lib/image_gen/stable_diffusion/clip.py +0 -116
  31. nexaai/binds/nexa_mlx/py-lib/image_gen/stable_diffusion/config.py +0 -65
  32. nexaai/binds/nexa_mlx/py-lib/image_gen/stable_diffusion/model_io.py +0 -386
  33. nexaai/binds/nexa_mlx/py-lib/image_gen/stable_diffusion/sampler.py +0 -105
  34. nexaai/binds/nexa_mlx/py-lib/image_gen/stable_diffusion/tokenizer.py +0 -100
  35. nexaai/binds/nexa_mlx/py-lib/image_gen/stable_diffusion/unet.py +0 -460
  36. nexaai/binds/nexa_mlx/py-lib/image_gen/stable_diffusion/vae.py +0 -274
  37. nexaai/binds/nexa_mlx/py-lib/llm/__init__.py +0 -0
  38. nexaai/binds/nexa_mlx/py-lib/llm/generate.py +0 -149
  39. nexaai/binds/nexa_mlx/py-lib/llm/interface.py +0 -764
  40. nexaai/binds/nexa_mlx/py-lib/llm/main.py +0 -68
  41. nexaai/binds/nexa_mlx/py-lib/rerank/__init__.py +0 -0
  42. nexaai/binds/nexa_mlx/py-lib/rerank/generate.py +0 -174
  43. nexaai/binds/nexa_mlx/py-lib/rerank/interface.py +0 -287
  44. nexaai/binds/nexa_mlx/py-lib/rerank/main.py +0 -127
  45. nexaai/binds/nexa_mlx/py-lib/rerank/modeling/__init__.py +0 -0
  46. nexaai/binds/nexa_mlx/py-lib/rerank/modeling/nexa_jina_rerank.py +0 -330
  47. nexaai/binds/nexa_mlx/py-lib/sd/__init__.py +0 -1
  48. nexaai/binds/nexa_mlx/py-lib/sd/interface.py +0 -362
  49. nexaai/binds/nexa_mlx/py-lib/sd/main.py +0 -286
  50. nexaai/binds/nexa_mlx/py-lib/sd/modeling/__init__.py +0 -306
  51. nexaai/binds/nexa_mlx/py-lib/sd/modeling/clip.py +0 -116
  52. nexaai/binds/nexa_mlx/py-lib/sd/modeling/config.py +0 -65
  53. nexaai/binds/nexa_mlx/py-lib/sd/modeling/model_io.py +0 -385
  54. nexaai/binds/nexa_mlx/py-lib/sd/modeling/sampler.py +0 -105
  55. nexaai/binds/nexa_mlx/py-lib/sd/modeling/tokenizer.py +0 -100
  56. nexaai/binds/nexa_mlx/py-lib/sd/modeling/unet.py +0 -460
  57. nexaai/binds/nexa_mlx/py-lib/sd/modeling/vae.py +0 -274
  58. nexaai/binds/nexa_mlx/py-lib/tts/__init__.py +0 -12
  59. nexaai/binds/nexa_mlx/py-lib/tts/interface.py +0 -276
  60. nexaai/binds/nexa_mlx/py-lib/vlm/__init__.py +0 -3
  61. nexaai/binds/nexa_mlx/py-lib/vlm/generate.py +0 -572
  62. nexaai/binds/nexa_mlx/py-lib/vlm/generate_qwen3_vl.py +0 -294
  63. nexaai/binds/nexa_mlx/py-lib/vlm/generate_qwen3_vl_moe.py +0 -276
  64. nexaai/binds/nexa_mlx/py-lib/vlm/interface.py +0 -504
  65. nexaai/binds/nexa_mlx/py-lib/vlm/main.py +0 -320
  66. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/__init__.py +0 -0
  67. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/convert.py +0 -68
  68. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/__init__.py +0 -0
  69. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/aya_vision/__init__.py +0 -8
  70. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/aya_vision/aya_vision.py +0 -193
  71. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/aya_vision/interpolate.py +0 -186
  72. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/aya_vision/language.py +0 -233
  73. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/aya_vision/vision.py +0 -503
  74. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/base.py +0 -202
  75. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/cache.py +0 -230
  76. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/deepseek_vl_v2/__init__.py +0 -10
  77. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/deepseek_vl_v2/conversation.py +0 -264
  78. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/deepseek_vl_v2/deepseek_vl_v2.py +0 -472
  79. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/deepseek_vl_v2/language.py +0 -591
  80. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/deepseek_vl_v2/processing_deepsek_vl_v2.py +0 -526
  81. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/deepseek_vl_v2/vision.py +0 -356
  82. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/florence2/__init__.py +0 -8
  83. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/florence2/florence2.py +0 -366
  84. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/florence2/language.py +0 -488
  85. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/florence2/vision.py +0 -591
  86. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/gemma3/__init__.py +0 -8
  87. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/gemma3/gemma3.py +0 -213
  88. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/gemma3/language.py +0 -315
  89. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/gemma3/vision.py +0 -238
  90. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/gemma3n/__init__.py +0 -2
  91. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/gemma3n/audio.py +0 -1038
  92. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/gemma3n/config.py +0 -139
  93. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/gemma3n/gemma3n.py +0 -322
  94. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/gemma3n/language.py +0 -629
  95. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/gemma3n/vision.py +0 -1022
  96. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/idefics2/__init__.py +0 -9
  97. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/idefics2/idefics2.py +0 -294
  98. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/idefics2/language.py +0 -191
  99. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/idefics2/vision.py +0 -267
  100. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/idefics3/__init__.py +0 -8
  101. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/idefics3/idefics3.py +0 -175
  102. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/idefics3/language.py +0 -192
  103. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/idefics3/vision.py +0 -233
  104. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/internvl_chat/__init__.py +0 -9
  105. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/internvl_chat/internvl_chat.py +0 -140
  106. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/internvl_chat/language.py +0 -220
  107. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/internvl_chat/processor.py +0 -393
  108. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/internvl_chat/vision.py +0 -293
  109. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/kernels.py +0 -307
  110. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/kimi_vl/__init__.py +0 -8
  111. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/kimi_vl/kimi_vl.py +0 -143
  112. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/kimi_vl/language.py +0 -509
  113. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/kimi_vl/vision.py +0 -522
  114. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/llama4/__init__.py +0 -8
  115. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/llama4/language.py +0 -386
  116. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/llama4/llama4.py +0 -138
  117. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/llama4/vision.py +0 -560
  118. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/llava/__init__.py +0 -8
  119. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/llava/language.py +0 -240
  120. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/llava/llava.py +0 -153
  121. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/llava/vision.py +0 -259
  122. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/llava_bunny/__init__.py +0 -9
  123. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/llava_bunny/language.py +0 -236
  124. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/llava_bunny/llava_bunny.py +0 -256
  125. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/llava_bunny/vision.py +0 -303
  126. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/llava_next/__init__.py +0 -8
  127. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/llava_next/language.py +0 -230
  128. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/llava_next/llava_next.py +0 -160
  129. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/llava_next/vision.py +0 -243
  130. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/mistral3/__init__.py +0 -8
  131. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/mistral3/mistral3.py +0 -283
  132. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/mllama/__init__.py +0 -8
  133. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/mllama/language.py +0 -416
  134. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/mllama/mllama.py +0 -172
  135. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/mllama/vision.py +0 -499
  136. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/molmo/__init__.py +0 -8
  137. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/molmo/language.py +0 -243
  138. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/molmo/molmo.py +0 -133
  139. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/molmo/vision.py +0 -465
  140. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/multi_modality/__init__.py +0 -10
  141. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/multi_modality/language.py +0 -230
  142. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/multi_modality/multi_modality.py +0 -385
  143. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/multi_modality/sam.py +0 -557
  144. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/multi_modality/vision.py +0 -526
  145. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/paligemma/__init__.py +0 -8
  146. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/paligemma/language.py +0 -282
  147. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/paligemma/paligemma.py +0 -160
  148. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/paligemma/vision.py +0 -242
  149. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/phi3_v/__init__.py +0 -8
  150. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/phi3_v/language.py +0 -21
  151. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/phi3_v/phi3_v.py +0 -243
  152. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/phi3_v/su_rope.py +0 -71
  153. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/phi3_v/vision.py +0 -324
  154. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/pixtral/__init__.py +0 -8
  155. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/pixtral/language.py +0 -229
  156. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/pixtral/pixtral.py +0 -161
  157. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/pixtral/vision.py +0 -320
  158. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen2_5_vl/__init__.py +0 -2
  159. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen2_5_vl/config.py +0 -108
  160. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen2_5_vl/language.py +0 -490
  161. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen2_5_vl/qwen2_5_vl.py +0 -168
  162. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen2_5_vl/vision.py +0 -414
  163. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen2_vl/__init__.py +0 -2
  164. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen2_vl/config.py +0 -104
  165. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen2_vl/language.py +0 -490
  166. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen2_vl/qwen2_vl.py +0 -167
  167. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen2_vl/vision.py +0 -312
  168. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen3_vl/llm_common/__init__.py +0 -0
  169. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen3_vl/llm_common/base.py +0 -117
  170. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen3_vl/llm_common/cache.py +0 -531
  171. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen3_vl/llm_common/generate.py +0 -701
  172. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen3_vl/llm_common/rope_utils.py +0 -255
  173. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen3_vl/llm_common/sample_utils.py +0 -303
  174. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen3_vl/llm_common/tokenizer_utils.py +0 -407
  175. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen3_vl/processor.py +0 -476
  176. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen3_vl/qwen3vl.py +0 -1223
  177. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen3vl_moe/llm_common/__init__.py +0 -0
  178. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen3vl_moe/llm_common/base.py +0 -117
  179. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen3vl_moe/llm_common/cache.py +0 -531
  180. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen3vl_moe/llm_common/generate.py +0 -701
  181. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen3vl_moe/llm_common/rope_utils.py +0 -255
  182. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen3vl_moe/llm_common/sample_utils.py +0 -303
  183. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen3vl_moe/llm_common/tokenizer_utils.py +0 -407
  184. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen3vl_moe/processor.py +0 -476
  185. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen3vl_moe/qwen3vl_moe.py +0 -1309
  186. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/qwen3vl_moe/switch_layers.py +0 -210
  187. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/smolvlm/__init__.py +0 -8
  188. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/models/smolvlm/smolvlm.py +0 -62
  189. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/processing_qwen2_5_vl.py +0 -209
  190. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/processing_qwen2_vl.py +0 -215
  191. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/prompt_utils.py +0 -474
  192. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/sample_utils.py +0 -39
  193. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/tokenizer_utils.py +0 -344
  194. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/trainer/__init__.py +0 -9
  195. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/trainer/lora.py +0 -70
  196. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/trainer/trainer.py +0 -296
  197. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/trainer/utils.py +0 -160
  198. nexaai/binds/nexa_mlx/py-lib/vlm/modeling/utils.py +0 -928
  199. {nexaai-1.0.19rc7.dist-info → nexaai-1.0.19rc9.dist-info}/WHEEL +0 -0
  200. {nexaai-1.0.19rc7.dist-info → nexaai-1.0.19rc9.dist-info}/top_level.txt +0 -0
@@ -1,122 +0,0 @@
1
- from typing import Any, List, Optional, Sequence
2
- import argparse
3
- import sys
4
- import os
5
-
6
- import mlx.core as mx
7
- import numpy as np
8
-
9
- from ml import ASR, ASRConfig, ASRResult, Path as MLPath
10
- from mlx_audio.stt.utils import load_model
11
- from mlx_audio.stt.models.whisper.tokenizer import LANGUAGES
12
- from mlx_audio.stt.models.whisper.whisper import Model
13
- import soundfile as sf
14
- import scipy.signal
15
-
16
- from profiling import ProfilingMixin, StopReason
17
-
18
-
19
- class MlxAsr(ASR, ProfilingMixin):
20
- """MLX Audio implementation of ASR interface."""
21
-
22
- def __init__(
23
- self,
24
- model_path: MLPath,
25
- tokenizer_path: Optional[MLPath],
26
- language: Optional[str],
27
- device: Optional[str] = None,
28
- ) -> None:
29
- # Initialize profiling mixin
30
- ProfilingMixin.__init__(self)
31
-
32
- if os.path.isfile(model_path):
33
- model_path = os.path.dirname(model_path)
34
-
35
- super().__init__(model_path, tokenizer_path, language, device)
36
-
37
- # Load model immediately in constructor
38
- self.model: Model = load_model(model_path)
39
- self.model_path = model_path
40
-
41
- def destroy(self) -> None:
42
- """Destroy the model and free resources."""
43
- if self.model is not None:
44
- del self.model
45
- self.model = None
46
- mx.clear_cache()
47
-
48
- def close(self) -> None:
49
- """Close the model."""
50
- self.destroy()
51
-
52
- def transcribe(
53
- self,
54
- audio_path: MLPath,
55
- language: Optional[str] = None,
56
- config: Optional[ASRConfig] = None,
57
- clear_cache: bool = True,
58
- ) -> ASRResult:
59
- """Transcribe audio file to text."""
60
- if self.model is None:
61
- raise RuntimeError("Model not loaded")
62
-
63
- # Start profiling
64
- self._start_profiling()
65
- self._decode_start()
66
-
67
- try:
68
- result = self.model.generate(audio_path)
69
-
70
- if clear_cache:
71
- mx.clear_cache()
72
-
73
- self._decode_end()
74
- self._set_stop_reason(StopReason.ML_STOP_REASON_COMPLETED)
75
- self._end_profiling()
76
- except Exception as e:
77
- self._end_profiling()
78
- raise RuntimeError(f"Failed to transcribe audio file {audio_path}: {e}")
79
-
80
- # Extract confidence scores and timestamps
81
- confidence_scores = []
82
- timestamps = []
83
-
84
- # Handle different result types: Whisper (STTOutput) vs Parakeet (AlignedResult)
85
- if hasattr(result, 'segments') and result.segments:
86
- # Whisper STTOutput format
87
- for segment in result.segments:
88
- if 'avg_logprob' in segment:
89
- # Convert log probability to confidence score (0-1)
90
- confidence = max(0.0, min(1.0, np.exp(segment['avg_logprob'])))
91
- confidence_scores.append(confidence)
92
- else:
93
- confidence_scores.append(0.5) # Default confidence
94
-
95
- start_time = segment.get('start', 0.0)
96
- end_time = segment.get('end', 0.0)
97
- timestamps.append((start_time, end_time))
98
- elif hasattr(result, 'sentences') and result.sentences:
99
- # Parakeet AlignedResult format
100
- for sentence in result.sentences:
101
- confidence_scores.append(0.5) # Default confidence for Parakeet
102
- timestamps.append((sentence.start, sentence.end))
103
- else:
104
- # Single segment case or empty result
105
- confidence_scores.append(0.5)
106
- timestamps.append((0.0, 0.0)) # Default timestamps
107
-
108
- return ASRResult(
109
- transcript=result.text,
110
- confidence_scores=confidence_scores,
111
- timestamps=timestamps,
112
- duration_us=self._get_audio_duration_us(audio_path)
113
- )
114
-
115
- def list_supported_languages(self) -> List[str]:
116
- """List supported languages."""
117
- return list(LANGUAGES.keys())
118
-
119
- def _get_audio_duration_us(self, audio_path: MLPath) -> int:
120
- with sf.SoundFile(audio_path) as f:
121
- duration_us = f.frames / f.samplerate * 1e6
122
- return int(duration_us)
File without changes
@@ -1,25 +0,0 @@
1
- import atexit
2
-
3
- # Store the original atexit.register function
4
- _original_atexit_register = atexit.register
5
-
6
- def _filtered_atexit_register(func, *args, **kwargs):
7
- """
8
- Clean atexit interceptor that skips nanobind handlers to prevent segfaults due to MLX atexit cleanups.
9
- This should be registered early during Python runtime initialization.
10
- """
11
- # Skip nanobind handlers silently
12
- func_type_str = str(type(func))
13
- if 'nanobind' in func_type_str or func_type_str.startswith("<class 'nb_"):
14
- return lambda: None
15
-
16
- # Allow all other handlers to register normally
17
- return _original_atexit_register(func, *args, **kwargs)
18
-
19
- def install_atexit_filter():
20
- """Install the atexit filter to prevent problematic nanobind registrations."""
21
- atexit.register = _filtered_atexit_register
22
-
23
- def uninstall_atexit_filter():
24
- """Restore the original atexit.register function."""
25
- atexit.register = _original_atexit_register
File without changes
@@ -1,195 +0,0 @@
1
- #!/usr/bin/env python3
2
-
3
- import os
4
- import sys
5
- import time
6
- import math
7
- from pathlib import Path
8
-
9
- import cv2
10
- import numpy as np
11
- from PIL import Image, ImageDraw, ImageFont
12
-
13
- from .modeling.pp_ocr_v4 import Config, TextSystem
14
-
15
-
16
- def is_image_file(file_path):
17
- """Check if file is an image based on extension."""
18
- img_extensions = {".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".tif", ".gif", ".rgb"}
19
- return Path(file_path).suffix.lower() in img_extensions
20
-
21
-
22
- def get_image_file_list(img_file):
23
- """Get list of image files from a directory or single file."""
24
- imgs_lists = []
25
- if img_file is None or not os.path.exists(img_file):
26
- raise Exception("not found any img file in {}".format(img_file))
27
-
28
- if os.path.isfile(img_file) and is_image_file(img_file):
29
- imgs_lists.append(img_file)
30
- elif os.path.isdir(img_file):
31
- for single_file in os.listdir(img_file):
32
- file_path = os.path.join(img_file, single_file)
33
- if is_image_file(file_path):
34
- imgs_lists.append(file_path)
35
- if len(imgs_lists) == 0:
36
- raise Exception("not found any img file in {}".format(img_file))
37
- return imgs_lists
38
-
39
-
40
- def check_and_read_gif(img_path):
41
- """Check if image is gif and read it properly."""
42
- if os.path.basename(img_path)[-3:] in ["gif", "GIF"]:
43
- gif = cv2.VideoCapture(img_path)
44
- ret, frame = gif.read()
45
- if not ret:
46
- print("Cannot read {}. This gif image maybe corrupted.".format(img_path))
47
- return None, False
48
- if len(frame.shape) == 2 or frame.shape[-1] == 1:
49
- frame = cv2.cvtColor(frame, cv2.COLOR_GRAY2RGB)
50
- imgvalue = frame[:, :, ::-1]
51
- return imgvalue, True
52
- return None, False
53
-
54
-
55
- def draw_ocr_box_txt(
56
- image, boxes, txts, scores=None, drop_score=0.5, font_path="./doc/simfang.ttf"
57
- ):
58
- """Draw OCR results with boxes and text."""
59
- h, w = image.height, image.width
60
- img_left = image.copy()
61
- img_right = Image.new("RGB", (w, h), (255, 255, 255))
62
-
63
- import random
64
- random.seed(0)
65
-
66
- draw_left = ImageDraw.Draw(img_left)
67
- draw_right = ImageDraw.Draw(img_right)
68
-
69
- for idx, (box, txt) in enumerate(zip(boxes, txts)):
70
- if scores is not None and scores[idx] < drop_score:
71
- continue
72
-
73
- color = (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255))
74
- draw_left.polygon(box, fill=color)
75
- draw_right.polygon(
76
- [
77
- box[0][0],
78
- box[0][1],
79
- box[1][0],
80
- box[1][1],
81
- box[2][0],
82
- box[2][1],
83
- box[3][0],
84
- box[3][1],
85
- ],
86
- outline=color,
87
- )
88
-
89
- box_height = math.sqrt((box[0][0] - box[3][0]) ** 2 + (box[0][1] - box[3][1]) ** 2)
90
- box_width = math.sqrt((box[0][0] - box[1][0]) ** 2 + (box[0][1] - box[1][1]) ** 2)
91
-
92
- if box_height > 2 * box_width:
93
- font_size = max(int(box_width * 0.9), 10)
94
- try:
95
- font = ImageFont.truetype(font_path, font_size, encoding="utf-8")
96
- except:
97
- font = ImageFont.load_default()
98
- cur_y = box[0][1]
99
- for c in txt:
100
- try:
101
- bbox = font.getbbox(c)
102
- char_size = (bbox[2] - bbox[0], bbox[3] - bbox[1])
103
- except:
104
- char_size = (font_size, font_size)
105
- draw_right.text((box[0][0] + 3, cur_y), c, fill=(0, 0, 0), font=font)
106
- cur_y += char_size[1]
107
- else:
108
- font_size = max(int(box_height * 0.8), 10)
109
- try:
110
- font = ImageFont.truetype(font_path, font_size, encoding="utf-8")
111
- except:
112
- font = ImageFont.load_default()
113
- draw_right.text([box[0][0], box[0][1]], txt, fill=(0, 0, 0), font=font)
114
-
115
- img_left = Image.blend(image, img_left, 0.5)
116
- img_show = Image.new("RGB", (w * 2, h), (255, 255, 255))
117
- img_show.paste(img_left, (0, 0, w, h))
118
- img_show.paste(img_right, (w, 0, w * 2, h))
119
-
120
- return np.array(img_show)
121
-
122
-
123
- def load_model():
124
- """Load OCR model and return config and text system."""
125
- config = Config()
126
- ocr_system = TextSystem(config)
127
- return config, ocr_system
128
-
129
-
130
- def process_folder(config, ocr_system):
131
- """Process all images in the configured folder."""
132
- img_paths = get_image_file_list(config.image_dir)
133
- if not img_paths:
134
- print("[ERR] No images found in", config.image_dir)
135
- return
136
-
137
- out_root = Path(config.base_dir) / "output"
138
- txt_dir = out_root / "inference_txt"
139
- vis_dir = out_root / "inference_results"
140
- txt_dir.mkdir(parents=True, exist_ok=True)
141
- vis_dir.mkdir(parents=True, exist_ok=True)
142
-
143
- font = config.vis_font_path
144
-
145
- total = 0.0
146
- for idx, p in enumerate(img_paths, 1):
147
- img, is_gif = check_and_read_gif(p)
148
- if not is_gif:
149
- img = cv2.imread(p)
150
- if img is None:
151
- print(f"[WARN] skip {p}")
152
- continue
153
-
154
- t0 = time.time()
155
- boxes, recs = ocr_system(img)
156
- dt = time.time() - t0
157
- total += dt
158
-
159
- name = Path(p).stem
160
-
161
- with open(txt_dir / f"{name}.txt", "w", encoding="utf-8") as f:
162
- f.writelines(f"{txt}\n" for txt, sc in recs) # DO NOT write confidence score in txt file
163
-
164
- vis = draw_ocr_box_txt(
165
- Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)),
166
- boxes,
167
- [t for t, _ in recs],
168
- [s for _, s in recs],
169
- drop_score=config.drop_score,
170
- font_path=font,
171
- )
172
- cv2.imwrite(str(vis_dir / f"{name}.jpg"), vis[:, :, ::-1])
173
-
174
- print(f"[{idx}/{len(img_paths)}] {Path(p).name} boxes={len(boxes)} time={dt:.3f}s")
175
-
176
- print(f"\nDone {len(img_paths)} images in {total:.2f}s (avg {total/len(img_paths):.3f}s)")
177
-
178
-
179
- def main():
180
- """Main function to demonstrate OCR functionality."""
181
- print("📥 Loading OCR model...")
182
-
183
- # Load model and config
184
- config, ocr_system = load_model()
185
-
186
- print("✅ OCR model loaded successfully!")
187
- print(f"📂 Processing images from: {config.image_dir}")
188
- print("="*50)
189
-
190
- # Process images
191
- process_folder(config, ocr_system)
192
-
193
-
194
- if __name__ == "__main__":
195
- main()
@@ -1,151 +0,0 @@
1
- # Copyright © Nexa AI
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
-
15
- from __future__ import annotations
16
-
17
- import os
18
- import json
19
- import time
20
- import cv2
21
- import numpy as np
22
- from pathlib import Path
23
- from typing import Any, List, Optional, Sequence, Tuple, Union
24
- from PIL import Image
25
- from dataclasses import dataclass
26
-
27
- # Import necessary modules
28
- import mlx.core as mx
29
-
30
- # Import from ml.py for API alignment
31
- from ml import (
32
- CVModel as BaseCVModel,
33
- CVModelConfig,
34
- CVResults,
35
- CVResult,
36
- CVCapabilities,
37
- Path as PathType,
38
- )
39
-
40
- # Import the model implementation
41
- from .modeling.pp_ocr_v4 import Config, TextSystem
42
-
43
- @dataclass
44
- class CVConfig:
45
- """Configuration for CV processing."""
46
- batch_size: int = 1
47
- drop_score: float = 0.5
48
- font_path: Optional[str] = None
49
-
50
- def __init__(
51
- self,
52
- batch_size: int = 1,
53
- drop_score: float = 0.5,
54
- font_path: Optional[str] = None,
55
- ) -> None:
56
- self.batch_size = batch_size
57
- self.drop_score = drop_score
58
- self.font_path = font_path
59
-
60
-
61
- class CVModel(BaseCVModel):
62
- """
63
- CV Model interface for MLX OCR models.
64
- API aligned with ml.py CVModel abstract base class.
65
- """
66
- def __init__(
67
- self,
68
- config: CVModelConfig,
69
- device: Optional[str] = None,
70
- ) -> None:
71
- super().__init__(config, device)
72
- # print(f"config: {config}")
73
- # TODO: this hack is to support local model path
74
- # hack only support pp_ocr_v4
75
- rec_model_path_str = str(config.rec_model_path) if config.rec_model_path else None
76
- model_cache_dir = os.path.dirname(rec_model_path_str) if rec_model_path_str else None
77
- # print(f"model_cache_dir: {model_cache_dir}")
78
- cfg = Config(model_cache_dir)
79
- # print(f"cfg: {cfg}")
80
- cfg.device = self.device
81
- self.ocr_system = TextSystem(cfg)
82
-
83
- def destroy(self) -> None:
84
- """Destroy the model and free resources."""
85
- self.ocr_system = None
86
- self.config = None
87
-
88
- def close(self) -> None:
89
- """Close the model."""
90
- self.destroy()
91
-
92
- def infer(self, input_image_path: str, clear_cache: bool = True) -> CVResults:
93
- """Perform inference on image."""
94
- if self.ocr_system is None:
95
- raise RuntimeError("Model not loaded. Call load_model() first.")
96
-
97
- # Load image
98
- img = self._load_image(input_image_path)
99
- if img is None:
100
- raise ValueError(f"Failed to load image: {input_image_path}")
101
-
102
- # Process with OCR
103
- boxes, recs = self.ocr_system(img)
104
-
105
- if clear_cache:
106
- mx.clear_cache()
107
-
108
- # Convert to CVResults format
109
- results = []
110
- for box, (text, score) in zip(boxes, recs):
111
- # Create CVResult
112
- result = CVResult(
113
- text=text,
114
- confidence=score,
115
- # Note: OCR doesn't use bounding boxes in the same way as detection models
116
- # but we can store the box coordinates if needed
117
- )
118
- results.append(result)
119
-
120
- return CVResults(results=results, result_count=len(results))
121
-
122
-
123
- def _load_image(self, image_path: Union[str, PathType]) -> Optional[np.ndarray]:
124
- """Load image from path."""
125
- try:
126
- # Check if it's a GIF
127
- if str(image_path).lower().endswith('.gif'):
128
- gif = cv2.VideoCapture(str(image_path))
129
- ret, frame = gif.read()
130
- if not ret:
131
- return None
132
- if len(frame.shape) == 2 or frame.shape[-1] == 1:
133
- frame = cv2.cvtColor(frame, cv2.COLOR_GRAY2RGB)
134
- return frame[:, :, ::-1] # BGR to RGB
135
- else:
136
- img = cv2.imread(str(image_path))
137
- if img is None:
138
- return None
139
- return img
140
- except Exception as e:
141
- print(f"Error loading image {image_path}: {e}")
142
- return None
143
-
144
-
145
-
146
- def create_cv_model(
147
- config: CVModelConfig,
148
- device: Optional[str] = None,
149
- ) -> CVModel:
150
- """Create a CV model instance."""
151
- return CVModel(config, device)
@@ -1,81 +0,0 @@
1
- # Copyright © Nexa AI
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
-
15
- import os
16
-
17
- from .interface import create_cv_model, CVModelConfig
18
-
19
-
20
- def test_cv_model(model_path, test_image_path):
21
- """Test CV model functionality."""
22
-
23
- # Create CVModelConfig
24
- config = CVModelConfig(
25
- capabilities=0, # ML_CV_OCR
26
- model_path=model_path,
27
- system_library_path=None,
28
- backend_library_path=None,
29
- extension_library_path=None,
30
- config_file_path=None,
31
- char_dict_path=None
32
- )
33
-
34
- model = create_cv_model(config)
35
- print("✅ Model loaded successfully!")
36
-
37
- # Test images (you can replace these with actual image paths)
38
- test_images = [
39
- "cv/modeling/input/20250406-170821.jpeg",
40
- "cv/modeling/input/20250406-170838.jpeg",
41
- "cv/modeling/input/20250406-170906.jpeg",
42
- "cv/modeling/input/20250407-154044.jpeg",
43
- "cv/modeling/input/20250407-154059.jpeg"
44
- ] if test_image_path is None else [test_image_path]
45
-
46
- for img_path in test_images:
47
- if not os.path.exists(img_path):
48
- print(f"❌ Image file not found: {img_path}")
49
- continue
50
-
51
- results = model.infer(img_path)
52
- print(f"✅ OCR Results for {img_path}:")
53
- print("=" * 50)
54
-
55
- if results.result_count == 0:
56
- print("No text detected in the image.")
57
- else:
58
- print(f"Found {results.result_count} text regions:")
59
-
60
- for i, result in enumerate(results.results):
61
- print(f"\nRegion {i+1}:")
62
- print(f" Text: '{result.text}'")
63
- print(f" Confidence: {result.confidence:.3f}")
64
-
65
- print("\n✅ CV model test completed!")
66
-
67
-
68
- if __name__ == "__main__":
69
- import argparse
70
- parser = argparse.ArgumentParser(description="Test CV processor functionality")
71
- parser.add_argument("--model_path", type=str, default="nexaml/paddle-ocr-mlx",
72
- help="Path to the CV model")
73
- parser.add_argument("--image_path", type=str, default=None,
74
- help="Path to a specific image to process")
75
- parser.add_argument("--test_mode", action="store_true",
76
- help="Run in test mode with sample images")
77
-
78
- args = parser.parse_args()
79
-
80
- test_cv_model(args.model_path, args.image_path)
81
-