mlx 0.30.7.2 → 0.30.7.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (605) hide show
  1. checksums.yaml +4 -4
  2. data/ext/mlx/extconf.rb +267 -8
  3. data/ext/mlx/native.cpp +112 -58
  4. data/ext/mlx-onnx/native.cpp +1402 -0
  5. data/ext/mlx-onnx/native.hpp +19 -0
  6. data/lib/mlx/core.rb +342 -117
  7. data/lib/mlx/distributed_utils/common.rb +1 -1
  8. data/lib/mlx/distributed_utils/config.rb +7 -4
  9. data/lib/mlx/distributed_utils/launch.rb +2 -0
  10. data/lib/mlx/dsl/attention.rb +132 -0
  11. data/lib/mlx/dsl/builder.rb +8 -0
  12. data/lib/mlx/dsl/config_schema.rb +133 -0
  13. data/lib/mlx/dsl/generate.rb +193 -0
  14. data/lib/mlx/dsl/kv_cache.rb +96 -0
  15. data/lib/mlx/dsl/masks.rb +32 -0
  16. data/lib/mlx/dsl/positions.rb +35 -0
  17. data/lib/mlx/dsl/run_stack.rb +68 -0
  18. data/lib/mlx/dsl/tensor.rb +126 -0
  19. data/lib/mlx/dsl/transformer_block.rb +113 -0
  20. data/lib/mlx/dsl/weight_map.rb +140 -0
  21. data/lib/mlx/dsl.rb +10 -0
  22. data/lib/mlx/nn/base.rb +4 -0
  23. data/lib/mlx/nn/layers/linear.rb +2 -3
  24. data/lib/mlx/onnx.rb +250 -0
  25. data/lib/mlx/version.rb +1 -1
  26. data/lib/mlx-onnx/webgpu_harness.rb +289 -0
  27. data/{mlx → submodules/mlx}/mlx/backend/cuda/cublas_utils.cpp +0 -7
  28. data/{mlx → submodules/mlx}/mlx/backend/cuda/gemms/cublas_gemm.cpp +10 -2
  29. data/{mlx → submodules/mlx}/mlx/backend/cuda/quantized/cublas_qqmm.cpp +97 -46
  30. data/{mlx → submodules/mlx}/mlx/backend/cuda/quantized/cublas_qqmm.h +25 -13
  31. data/{mlx → submodules/mlx}/mlx/backend/cuda/quantized/fp_quantize.cu +101 -38
  32. data/{mlx → submodules/mlx}/mlx/backend/cuda/quantized/no_qqmm_impl.cpp +1 -2
  33. data/submodules/mlx/mlx/backend/cuda/quantized/qqmm.cpp +193 -0
  34. data/{mlx → submodules/mlx}/mlx/backend/cuda/quantized/qqmm_impl.cpp +15 -8
  35. data/{mlx → submodules/mlx}/mlx/backend/cuda/quantized/qqmm_impl.h +14 -3
  36. data/{mlx → submodules/mlx}/mlx/backend/cuda/quantized/qqmm_utils.cu +36 -0
  37. data/submodules/mlx/mlx/backend/cuda/quantized/qqmm_utils.h +62 -0
  38. data/{mlx → submodules/mlx}/mlx/backend/cuda/quantized/quantized.cpp +12 -3
  39. data/{mlx → submodules/mlx}/mlx/backend/cuda/quantized/quantized.h +4 -0
  40. data/{mlx → submodules/mlx}/mlx/backend/cuda/quantized/quantized_utils.cuh +1 -1
  41. data/{mlx → submodules/mlx}/mlx/backend/metal/device.cpp +4 -0
  42. data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/conv.metal +3 -2
  43. data/{mlx → submodules/mlx}/mlx/export.cpp +21 -6
  44. data/{mlx → submodules/mlx}/mlx/ops.cpp +144 -13
  45. data/{mlx → submodules/mlx}/mlx/ops.h +12 -2
  46. data/{mlx → submodules/mlx}/mlx/primitives.cpp +22 -5
  47. data/{mlx → submodules/mlx}/mlx/scheduler.cpp +4 -0
  48. data/{mlx → submodules/mlx}/mlx/scheduler.h +3 -0
  49. data/{mlx → submodules/mlx}/mlx/stream.h +5 -0
  50. data/submodules/mlx-onnx/CMakeLists.txt +159 -0
  51. data/submodules/mlx-onnx/LICENSE +21 -0
  52. data/submodules/mlx-onnx/include/mlx/ir.hpp +88 -0
  53. data/submodules/mlx-onnx/src/api.cpp +81 -0
  54. data/submodules/mlx-onnx/src/compat.cpp +111 -0
  55. data/submodules/mlx-onnx/src/detail.hpp +69 -0
  56. data/submodules/mlx-onnx/src/export.cpp +653 -0
  57. data/submodules/mlx-onnx/src/io.cpp +61 -0
  58. data/submodules/mlx-onnx/src/json.hpp +25 -0
  59. data/submodules/mlx-onnx/src/lowering.cpp +6346 -0
  60. data/submodules/mlx-onnx/src/mappings.cpp +201 -0
  61. data/submodules/mlx-onnx/src/mappings.hpp +16 -0
  62. data/submodules/mlx-onnx/src/onnx.cpp +1029 -0
  63. data/submodules/mlx-onnx/src/shared.cpp +206 -0
  64. metadata +665 -567
  65. data/mlx/mlx/backend/cuda/quantized/qqmm.cpp +0 -158
  66. data/mlx/mlx/backend/cuda/quantized/qqmm_utils.h +0 -30
  67. /data/{mlx → submodules/mlx}/CMakeLists.txt +0 -0
  68. /data/{mlx → submodules/mlx}/cmake/FindCUDNN.cmake +0 -0
  69. /data/{mlx → submodules/mlx}/cmake/FindNCCL.cmake +0 -0
  70. /data/{mlx → submodules/mlx}/cmake/Findnvpl.cmake +0 -0
  71. /data/{mlx → submodules/mlx}/cmake/extension.cmake +0 -0
  72. /data/{mlx → submodules/mlx}/mlx/3rdparty/.clang-format +0 -0
  73. /data/{mlx → submodules/mlx}/mlx/3rdparty/pocketfft.h +0 -0
  74. /data/{mlx → submodules/mlx}/mlx/CMakeLists.txt +0 -0
  75. /data/{mlx → submodules/mlx}/mlx/allocator.h +0 -0
  76. /data/{mlx → submodules/mlx}/mlx/api.h +0 -0
  77. /data/{mlx → submodules/mlx}/mlx/array.cpp +0 -0
  78. /data/{mlx → submodules/mlx}/mlx/array.h +0 -0
  79. /data/{mlx → submodules/mlx}/mlx/backend/common/CMakeLists.txt +0 -0
  80. /data/{mlx → submodules/mlx}/mlx/backend/common/binary.h +0 -0
  81. /data/{mlx → submodules/mlx}/mlx/backend/common/broadcasting.cpp +0 -0
  82. /data/{mlx → submodules/mlx}/mlx/backend/common/broadcasting.h +0 -0
  83. /data/{mlx → submodules/mlx}/mlx/backend/common/buffer_cache.h +0 -0
  84. /data/{mlx → submodules/mlx}/mlx/backend/common/common.cpp +0 -0
  85. /data/{mlx → submodules/mlx}/mlx/backend/common/compiled.cpp +0 -0
  86. /data/{mlx → submodules/mlx}/mlx/backend/common/compiled.h +0 -0
  87. /data/{mlx → submodules/mlx}/mlx/backend/common/copy.h +0 -0
  88. /data/{mlx → submodules/mlx}/mlx/backend/common/hadamard.h +0 -0
  89. /data/{mlx → submodules/mlx}/mlx/backend/common/load.cpp +0 -0
  90. /data/{mlx → submodules/mlx}/mlx/backend/common/matmul.h +0 -0
  91. /data/{mlx → submodules/mlx}/mlx/backend/common/reduce.cpp +0 -0
  92. /data/{mlx → submodules/mlx}/mlx/backend/common/reduce.h +0 -0
  93. /data/{mlx → submodules/mlx}/mlx/backend/common/slicing.cpp +0 -0
  94. /data/{mlx → submodules/mlx}/mlx/backend/common/slicing.h +0 -0
  95. /data/{mlx → submodules/mlx}/mlx/backend/common/ternary.h +0 -0
  96. /data/{mlx → submodules/mlx}/mlx/backend/common/unary.h +0 -0
  97. /data/{mlx → submodules/mlx}/mlx/backend/common/utils.cpp +0 -0
  98. /data/{mlx → submodules/mlx}/mlx/backend/common/utils.h +0 -0
  99. /data/{mlx → submodules/mlx}/mlx/backend/cpu/CMakeLists.txt +0 -0
  100. /data/{mlx → submodules/mlx}/mlx/backend/cpu/arange.h +0 -0
  101. /data/{mlx → submodules/mlx}/mlx/backend/cpu/arg_reduce.cpp +0 -0
  102. /data/{mlx → submodules/mlx}/mlx/backend/cpu/binary.cpp +0 -0
  103. /data/{mlx → submodules/mlx}/mlx/backend/cpu/binary.h +0 -0
  104. /data/{mlx → submodules/mlx}/mlx/backend/cpu/binary_ops.h +0 -0
  105. /data/{mlx → submodules/mlx}/mlx/backend/cpu/binary_two.h +0 -0
  106. /data/{mlx → submodules/mlx}/mlx/backend/cpu/cholesky.cpp +0 -0
  107. /data/{mlx → submodules/mlx}/mlx/backend/cpu/compiled.cpp +0 -0
  108. /data/{mlx → submodules/mlx}/mlx/backend/cpu/compiled_preamble.h +0 -0
  109. /data/{mlx → submodules/mlx}/mlx/backend/cpu/conv.cpp +0 -0
  110. /data/{mlx → submodules/mlx}/mlx/backend/cpu/copy.cpp +0 -0
  111. /data/{mlx → submodules/mlx}/mlx/backend/cpu/copy.h +0 -0
  112. /data/{mlx → submodules/mlx}/mlx/backend/cpu/device_info.cpp +0 -0
  113. /data/{mlx → submodules/mlx}/mlx/backend/cpu/device_info.h +0 -0
  114. /data/{mlx → submodules/mlx}/mlx/backend/cpu/distributed.cpp +0 -0
  115. /data/{mlx → submodules/mlx}/mlx/backend/cpu/eig.cpp +0 -0
  116. /data/{mlx → submodules/mlx}/mlx/backend/cpu/eigh.cpp +0 -0
  117. /data/{mlx → submodules/mlx}/mlx/backend/cpu/encoder.cpp +0 -0
  118. /data/{mlx → submodules/mlx}/mlx/backend/cpu/encoder.h +0 -0
  119. /data/{mlx → submodules/mlx}/mlx/backend/cpu/eval.cpp +0 -0
  120. /data/{mlx → submodules/mlx}/mlx/backend/cpu/eval.h +0 -0
  121. /data/{mlx → submodules/mlx}/mlx/backend/cpu/fft.cpp +0 -0
  122. /data/{mlx → submodules/mlx}/mlx/backend/cpu/gemm.h +0 -0
  123. /data/{mlx → submodules/mlx}/mlx/backend/cpu/gemms/bnns.cpp +0 -0
  124. /data/{mlx → submodules/mlx}/mlx/backend/cpu/gemms/cblas.cpp +0 -0
  125. /data/{mlx → submodules/mlx}/mlx/backend/cpu/gemms/simd_bf16.cpp +0 -0
  126. /data/{mlx → submodules/mlx}/mlx/backend/cpu/gemms/simd_fp16.cpp +0 -0
  127. /data/{mlx → submodules/mlx}/mlx/backend/cpu/gemms/simd_gemm.h +0 -0
  128. /data/{mlx → submodules/mlx}/mlx/backend/cpu/hadamard.cpp +0 -0
  129. /data/{mlx → submodules/mlx}/mlx/backend/cpu/indexing.cpp +0 -0
  130. /data/{mlx → submodules/mlx}/mlx/backend/cpu/inverse.cpp +0 -0
  131. /data/{mlx → submodules/mlx}/mlx/backend/cpu/jit_compiler.cpp +0 -0
  132. /data/{mlx → submodules/mlx}/mlx/backend/cpu/jit_compiler.h +0 -0
  133. /data/{mlx → submodules/mlx}/mlx/backend/cpu/lapack.h +0 -0
  134. /data/{mlx → submodules/mlx}/mlx/backend/cpu/logsumexp.cpp +0 -0
  135. /data/{mlx → submodules/mlx}/mlx/backend/cpu/luf.cpp +0 -0
  136. /data/{mlx → submodules/mlx}/mlx/backend/cpu/make_compiled_preamble.ps1 +0 -0
  137. /data/{mlx → submodules/mlx}/mlx/backend/cpu/make_compiled_preamble.sh +0 -0
  138. /data/{mlx → submodules/mlx}/mlx/backend/cpu/masked_mm.cpp +0 -0
  139. /data/{mlx → submodules/mlx}/mlx/backend/cpu/matmul.cpp +0 -0
  140. /data/{mlx → submodules/mlx}/mlx/backend/cpu/primitives.cpp +0 -0
  141. /data/{mlx → submodules/mlx}/mlx/backend/cpu/qrf.cpp +0 -0
  142. /data/{mlx → submodules/mlx}/mlx/backend/cpu/quantized.cpp +0 -0
  143. /data/{mlx → submodules/mlx}/mlx/backend/cpu/reduce.cpp +0 -0
  144. /data/{mlx → submodules/mlx}/mlx/backend/cpu/scan.cpp +0 -0
  145. /data/{mlx → submodules/mlx}/mlx/backend/cpu/select.cpp +0 -0
  146. /data/{mlx → submodules/mlx}/mlx/backend/cpu/simd/accelerate_fp16_simd.h +0 -0
  147. /data/{mlx → submodules/mlx}/mlx/backend/cpu/simd/accelerate_simd.h +0 -0
  148. /data/{mlx → submodules/mlx}/mlx/backend/cpu/simd/base_simd.h +0 -0
  149. /data/{mlx → submodules/mlx}/mlx/backend/cpu/simd/math.h +0 -0
  150. /data/{mlx → submodules/mlx}/mlx/backend/cpu/simd/neon_fp16_simd.h +0 -0
  151. /data/{mlx → submodules/mlx}/mlx/backend/cpu/simd/simd.h +0 -0
  152. /data/{mlx → submodules/mlx}/mlx/backend/cpu/simd/type.h +0 -0
  153. /data/{mlx → submodules/mlx}/mlx/backend/cpu/slicing.h +0 -0
  154. /data/{mlx → submodules/mlx}/mlx/backend/cpu/softmax.cpp +0 -0
  155. /data/{mlx → submodules/mlx}/mlx/backend/cpu/sort.cpp +0 -0
  156. /data/{mlx → submodules/mlx}/mlx/backend/cpu/svd.cpp +0 -0
  157. /data/{mlx → submodules/mlx}/mlx/backend/cpu/ternary.h +0 -0
  158. /data/{mlx → submodules/mlx}/mlx/backend/cpu/threefry.cpp +0 -0
  159. /data/{mlx → submodules/mlx}/mlx/backend/cpu/threefry.h +0 -0
  160. /data/{mlx → submodules/mlx}/mlx/backend/cpu/unary.cpp +0 -0
  161. /data/{mlx → submodules/mlx}/mlx/backend/cpu/unary.h +0 -0
  162. /data/{mlx → submodules/mlx}/mlx/backend/cpu/unary_ops.h +0 -0
  163. /data/{mlx → submodules/mlx}/mlx/backend/cuda/CMakeLists.txt +0 -0
  164. /data/{mlx → submodules/mlx}/mlx/backend/cuda/allocator.cpp +0 -0
  165. /data/{mlx → submodules/mlx}/mlx/backend/cuda/allocator.h +0 -0
  166. /data/{mlx → submodules/mlx}/mlx/backend/cuda/arange.cu +0 -0
  167. /data/{mlx → submodules/mlx}/mlx/backend/cuda/arg_reduce.cu +0 -0
  168. /data/{mlx → submodules/mlx}/mlx/backend/cuda/bin2h.cmake +0 -0
  169. /data/{mlx → submodules/mlx}/mlx/backend/cuda/binary/CMakeLists.txt +0 -0
  170. /data/{mlx → submodules/mlx}/mlx/backend/cuda/binary/add.cu +0 -0
  171. /data/{mlx → submodules/mlx}/mlx/backend/cuda/binary/arctan2.cu +0 -0
  172. /data/{mlx → submodules/mlx}/mlx/backend/cuda/binary/binary.cuh +0 -0
  173. /data/{mlx → submodules/mlx}/mlx/backend/cuda/binary/bitwise_binary.cu +0 -0
  174. /data/{mlx → submodules/mlx}/mlx/backend/cuda/binary/divide.cu +0 -0
  175. /data/{mlx → submodules/mlx}/mlx/backend/cuda/binary/equal.cu +0 -0
  176. /data/{mlx → submodules/mlx}/mlx/backend/cuda/binary/greater.cu +0 -0
  177. /data/{mlx → submodules/mlx}/mlx/backend/cuda/binary/greater_equal.cu +0 -0
  178. /data/{mlx → submodules/mlx}/mlx/backend/cuda/binary/less.cu +0 -0
  179. /data/{mlx → submodules/mlx}/mlx/backend/cuda/binary/less_equal.cu +0 -0
  180. /data/{mlx → submodules/mlx}/mlx/backend/cuda/binary/log_add_exp.cu +0 -0
  181. /data/{mlx → submodules/mlx}/mlx/backend/cuda/binary/logical_and.cu +0 -0
  182. /data/{mlx → submodules/mlx}/mlx/backend/cuda/binary/logical_or.cu +0 -0
  183. /data/{mlx → submodules/mlx}/mlx/backend/cuda/binary/maximum.cu +0 -0
  184. /data/{mlx → submodules/mlx}/mlx/backend/cuda/binary/minimum.cu +0 -0
  185. /data/{mlx → submodules/mlx}/mlx/backend/cuda/binary/multiply.cu +0 -0
  186. /data/{mlx → submodules/mlx}/mlx/backend/cuda/binary/not_equal.cu +0 -0
  187. /data/{mlx → submodules/mlx}/mlx/backend/cuda/binary/power.cu +0 -0
  188. /data/{mlx → submodules/mlx}/mlx/backend/cuda/binary/remainder.cu +0 -0
  189. /data/{mlx → submodules/mlx}/mlx/backend/cuda/binary/subtract.cu +0 -0
  190. /data/{mlx → submodules/mlx}/mlx/backend/cuda/binary_two.cu +0 -0
  191. /data/{mlx → submodules/mlx}/mlx/backend/cuda/compiled.cpp +0 -0
  192. /data/{mlx → submodules/mlx}/mlx/backend/cuda/conv/conv.h +0 -0
  193. /data/{mlx → submodules/mlx}/mlx/backend/cuda/conv/gemm_conv.cu +0 -0
  194. /data/{mlx → submodules/mlx}/mlx/backend/cuda/conv/gemm_grouped_conv.cu +0 -0
  195. /data/{mlx → submodules/mlx}/mlx/backend/cuda/conv.cpp +0 -0
  196. /data/{mlx → submodules/mlx}/mlx/backend/cuda/copy/copy.cuh +0 -0
  197. /data/{mlx → submodules/mlx}/mlx/backend/cuda/copy/copy_contiguous.cu +0 -0
  198. /data/{mlx → submodules/mlx}/mlx/backend/cuda/copy/copy_general.cu +0 -0
  199. /data/{mlx → submodules/mlx}/mlx/backend/cuda/copy/copy_general_dynamic.cu +0 -0
  200. /data/{mlx → submodules/mlx}/mlx/backend/cuda/copy/copy_general_input.cu +0 -0
  201. /data/{mlx → submodules/mlx}/mlx/backend/cuda/copy.cu +0 -0
  202. /data/{mlx → submodules/mlx}/mlx/backend/cuda/cublas_utils.h +0 -0
  203. /data/{mlx → submodules/mlx}/mlx/backend/cuda/cuda.h +0 -0
  204. /data/{mlx → submodules/mlx}/mlx/backend/cuda/cuda_utils.h +0 -0
  205. /data/{mlx → submodules/mlx}/mlx/backend/cuda/cudnn_utils.cpp +0 -0
  206. /data/{mlx → submodules/mlx}/mlx/backend/cuda/cudnn_utils.h +0 -0
  207. /data/{mlx → submodules/mlx}/mlx/backend/cuda/custom_kernel.cpp +0 -0
  208. /data/{mlx → submodules/mlx}/mlx/backend/cuda/cutlass_utils.cuh +0 -0
  209. /data/{mlx → submodules/mlx}/mlx/backend/cuda/delayload.cpp +0 -0
  210. /data/{mlx → submodules/mlx}/mlx/backend/cuda/device/atomic_ops.cuh +0 -0
  211. /data/{mlx → submodules/mlx}/mlx/backend/cuda/device/binary_ops.cuh +0 -0
  212. /data/{mlx → submodules/mlx}/mlx/backend/cuda/device/cast_op.cuh +0 -0
  213. /data/{mlx → submodules/mlx}/mlx/backend/cuda/device/complex.cuh +0 -0
  214. /data/{mlx → submodules/mlx}/mlx/backend/cuda/device/config.h +0 -0
  215. /data/{mlx → submodules/mlx}/mlx/backend/cuda/device/fp16_math.cuh +0 -0
  216. /data/{mlx → submodules/mlx}/mlx/backend/cuda/device/gather.cuh +0 -0
  217. /data/{mlx → submodules/mlx}/mlx/backend/cuda/device/gather_axis.cuh +0 -0
  218. /data/{mlx → submodules/mlx}/mlx/backend/cuda/device/indexing.cuh +0 -0
  219. /data/{mlx → submodules/mlx}/mlx/backend/cuda/device/scatter.cuh +0 -0
  220. /data/{mlx → submodules/mlx}/mlx/backend/cuda/device/scatter_axis.cuh +0 -0
  221. /data/{mlx → submodules/mlx}/mlx/backend/cuda/device/scatter_ops.cuh +0 -0
  222. /data/{mlx → submodules/mlx}/mlx/backend/cuda/device/ternary_ops.cuh +0 -0
  223. /data/{mlx → submodules/mlx}/mlx/backend/cuda/device/unary_ops.cuh +0 -0
  224. /data/{mlx → submodules/mlx}/mlx/backend/cuda/device/utils.cuh +0 -0
  225. /data/{mlx → submodules/mlx}/mlx/backend/cuda/device.cpp +0 -0
  226. /data/{mlx → submodules/mlx}/mlx/backend/cuda/device.h +0 -0
  227. /data/{mlx → submodules/mlx}/mlx/backend/cuda/device_info.cpp +0 -0
  228. /data/{mlx → submodules/mlx}/mlx/backend/cuda/distributed.cu +0 -0
  229. /data/{mlx → submodules/mlx}/mlx/backend/cuda/eval.cpp +0 -0
  230. /data/{mlx → submodules/mlx}/mlx/backend/cuda/event.cu +0 -0
  231. /data/{mlx → submodules/mlx}/mlx/backend/cuda/event.h +0 -0
  232. /data/{mlx → submodules/mlx}/mlx/backend/cuda/fence.cpp +0 -0
  233. /data/{mlx → submodules/mlx}/mlx/backend/cuda/gemms/cublas_gemm.h +0 -0
  234. /data/{mlx → submodules/mlx}/mlx/backend/cuda/gemms/cublas_gemm_batched_12_0.cpp +0 -0
  235. /data/{mlx → submodules/mlx}/mlx/backend/cuda/gemms/cublas_gemm_batched_12_9.cu +0 -0
  236. /data/{mlx → submodules/mlx}/mlx/backend/cuda/gemms/gemv.cu +0 -0
  237. /data/{mlx → submodules/mlx}/mlx/backend/cuda/gemms/gemv.h +0 -0
  238. /data/{mlx → submodules/mlx}/mlx/backend/cuda/gemms/grouped_gemm.h +0 -0
  239. /data/{mlx → submodules/mlx}/mlx/backend/cuda/gemms/grouped_gemm_unaligned.cu +0 -0
  240. /data/{mlx → submodules/mlx}/mlx/backend/cuda/indexing.cpp +0 -0
  241. /data/{mlx → submodules/mlx}/mlx/backend/cuda/jit_module.cpp +0 -0
  242. /data/{mlx → submodules/mlx}/mlx/backend/cuda/jit_module.h +0 -0
  243. /data/{mlx → submodules/mlx}/mlx/backend/cuda/kernel_utils.cu +0 -0
  244. /data/{mlx → submodules/mlx}/mlx/backend/cuda/kernel_utils.cuh +0 -0
  245. /data/{mlx → submodules/mlx}/mlx/backend/cuda/layer_norm.cu +0 -0
  246. /data/{mlx → submodules/mlx}/mlx/backend/cuda/load.cpp +0 -0
  247. /data/{mlx → submodules/mlx}/mlx/backend/cuda/logsumexp.cu +0 -0
  248. /data/{mlx → submodules/mlx}/mlx/backend/cuda/lru_cache.h +0 -0
  249. /data/{mlx → submodules/mlx}/mlx/backend/cuda/matmul.cpp +0 -0
  250. /data/{mlx → submodules/mlx}/mlx/backend/cuda/no_cuda.cpp +0 -0
  251. /data/{mlx → submodules/mlx}/mlx/backend/cuda/primitives.cpp +0 -0
  252. /data/{mlx → submodules/mlx}/mlx/backend/cuda/quantized/affine_quantize.cu +0 -0
  253. /data/{mlx → submodules/mlx}/mlx/backend/cuda/quantized/convert_fp8.cu +0 -0
  254. /data/{mlx → submodules/mlx}/mlx/backend/cuda/quantized/cuda_fp4.h +0 -0
  255. /data/{mlx → submodules/mlx}/mlx/backend/cuda/quantized/mxfp8_quantize.cuh +0 -0
  256. /data/{mlx → submodules/mlx}/mlx/backend/cuda/quantized/nvfp4_quantize.cuh +0 -0
  257. /data/{mlx → submodules/mlx}/mlx/backend/cuda/quantized/qmv.cu +0 -0
  258. /data/{mlx → submodules/mlx}/mlx/backend/cuda/quantized/qmv.h +0 -0
  259. /data/{mlx → submodules/mlx}/mlx/backend/cuda/quantized/quantized_utils.h +0 -0
  260. /data/{mlx → submodules/mlx}/mlx/backend/cuda/random.cu +0 -0
  261. /data/{mlx → submodules/mlx}/mlx/backend/cuda/reduce/all_reduce.cu +0 -0
  262. /data/{mlx → submodules/mlx}/mlx/backend/cuda/reduce/col_reduce.cu +0 -0
  263. /data/{mlx → submodules/mlx}/mlx/backend/cuda/reduce/init_reduce.cu +0 -0
  264. /data/{mlx → submodules/mlx}/mlx/backend/cuda/reduce/reduce.cuh +0 -0
  265. /data/{mlx → submodules/mlx}/mlx/backend/cuda/reduce/reduce_ops.cuh +0 -0
  266. /data/{mlx → submodules/mlx}/mlx/backend/cuda/reduce/reduce_utils.cuh +0 -0
  267. /data/{mlx → submodules/mlx}/mlx/backend/cuda/reduce/row_reduce.cu +0 -0
  268. /data/{mlx → submodules/mlx}/mlx/backend/cuda/reduce.cu +0 -0
  269. /data/{mlx → submodules/mlx}/mlx/backend/cuda/rms_norm.cu +0 -0
  270. /data/{mlx → submodules/mlx}/mlx/backend/cuda/rope.cu +0 -0
  271. /data/{mlx → submodules/mlx}/mlx/backend/cuda/scaled_dot_product_attention.cpp +0 -0
  272. /data/{mlx → submodules/mlx}/mlx/backend/cuda/scaled_dot_product_attention.cu +0 -0
  273. /data/{mlx → submodules/mlx}/mlx/backend/cuda/scan.cu +0 -0
  274. /data/{mlx → submodules/mlx}/mlx/backend/cuda/slicing.cpp +0 -0
  275. /data/{mlx → submodules/mlx}/mlx/backend/cuda/softmax.cu +0 -0
  276. /data/{mlx → submodules/mlx}/mlx/backend/cuda/sort.cu +0 -0
  277. /data/{mlx → submodules/mlx}/mlx/backend/cuda/steel/defines.cuh +0 -0
  278. /data/{mlx → submodules/mlx}/mlx/backend/cuda/steel/gemm.cuh +0 -0
  279. /data/{mlx → submodules/mlx}/mlx/backend/cuda/steel/mma.cuh +0 -0
  280. /data/{mlx → submodules/mlx}/mlx/backend/cuda/steel/tiles.cuh +0 -0
  281. /data/{mlx → submodules/mlx}/mlx/backend/cuda/steel/utils.cuh +0 -0
  282. /data/{mlx → submodules/mlx}/mlx/backend/cuda/ternary.cu +0 -0
  283. /data/{mlx → submodules/mlx}/mlx/backend/cuda/unary/CMakeLists.txt +0 -0
  284. /data/{mlx → submodules/mlx}/mlx/backend/cuda/unary/abs.cu +0 -0
  285. /data/{mlx → submodules/mlx}/mlx/backend/cuda/unary/arccos.cu +0 -0
  286. /data/{mlx → submodules/mlx}/mlx/backend/cuda/unary/arccosh.cu +0 -0
  287. /data/{mlx → submodules/mlx}/mlx/backend/cuda/unary/arcsin.cu +0 -0
  288. /data/{mlx → submodules/mlx}/mlx/backend/cuda/unary/arcsinh.cu +0 -0
  289. /data/{mlx → submodules/mlx}/mlx/backend/cuda/unary/arctan.cu +0 -0
  290. /data/{mlx → submodules/mlx}/mlx/backend/cuda/unary/arctanh.cu +0 -0
  291. /data/{mlx → submodules/mlx}/mlx/backend/cuda/unary/bitwise_invert.cu +0 -0
  292. /data/{mlx → submodules/mlx}/mlx/backend/cuda/unary/ceil.cu +0 -0
  293. /data/{mlx → submodules/mlx}/mlx/backend/cuda/unary/conjugate.cu +0 -0
  294. /data/{mlx → submodules/mlx}/mlx/backend/cuda/unary/cos.cu +0 -0
  295. /data/{mlx → submodules/mlx}/mlx/backend/cuda/unary/cosh.cu +0 -0
  296. /data/{mlx → submodules/mlx}/mlx/backend/cuda/unary/erf.cu +0 -0
  297. /data/{mlx → submodules/mlx}/mlx/backend/cuda/unary/erf_inv.cu +0 -0
  298. /data/{mlx → submodules/mlx}/mlx/backend/cuda/unary/exp.cu +0 -0
  299. /data/{mlx → submodules/mlx}/mlx/backend/cuda/unary/expm1.cu +0 -0
  300. /data/{mlx → submodules/mlx}/mlx/backend/cuda/unary/floor.cu +0 -0
  301. /data/{mlx → submodules/mlx}/mlx/backend/cuda/unary/imag.cu +0 -0
  302. /data/{mlx → submodules/mlx}/mlx/backend/cuda/unary/log.cu +0 -0
  303. /data/{mlx → submodules/mlx}/mlx/backend/cuda/unary/log1p.cu +0 -0
  304. /data/{mlx → submodules/mlx}/mlx/backend/cuda/unary/logical_not.cu +0 -0
  305. /data/{mlx → submodules/mlx}/mlx/backend/cuda/unary/negative.cu +0 -0
  306. /data/{mlx → submodules/mlx}/mlx/backend/cuda/unary/real.cu +0 -0
  307. /data/{mlx → submodules/mlx}/mlx/backend/cuda/unary/round.cu +0 -0
  308. /data/{mlx → submodules/mlx}/mlx/backend/cuda/unary/sigmoid.cu +0 -0
  309. /data/{mlx → submodules/mlx}/mlx/backend/cuda/unary/sign.cu +0 -0
  310. /data/{mlx → submodules/mlx}/mlx/backend/cuda/unary/sin.cu +0 -0
  311. /data/{mlx → submodules/mlx}/mlx/backend/cuda/unary/sinh.cu +0 -0
  312. /data/{mlx → submodules/mlx}/mlx/backend/cuda/unary/sqrt.cu +0 -0
  313. /data/{mlx → submodules/mlx}/mlx/backend/cuda/unary/square.cu +0 -0
  314. /data/{mlx → submodules/mlx}/mlx/backend/cuda/unary/tan.cu +0 -0
  315. /data/{mlx → submodules/mlx}/mlx/backend/cuda/unary/tanh.cu +0 -0
  316. /data/{mlx → submodules/mlx}/mlx/backend/cuda/unary/unary.cuh +0 -0
  317. /data/{mlx → submodules/mlx}/mlx/backend/cuda/utils.cpp +0 -0
  318. /data/{mlx → submodules/mlx}/mlx/backend/cuda/utils.h +0 -0
  319. /data/{mlx → submodules/mlx}/mlx/backend/cuda/vector_types.cuh +0 -0
  320. /data/{mlx → submodules/mlx}/mlx/backend/cuda/worker.cpp +0 -0
  321. /data/{mlx → submodules/mlx}/mlx/backend/cuda/worker.h +0 -0
  322. /data/{mlx → submodules/mlx}/mlx/backend/gpu/CMakeLists.txt +0 -0
  323. /data/{mlx → submodules/mlx}/mlx/backend/gpu/copy.cpp +0 -0
  324. /data/{mlx → submodules/mlx}/mlx/backend/gpu/copy.h +0 -0
  325. /data/{mlx → submodules/mlx}/mlx/backend/gpu/device_info.h +0 -0
  326. /data/{mlx → submodules/mlx}/mlx/backend/gpu/eval.h +0 -0
  327. /data/{mlx → submodules/mlx}/mlx/backend/gpu/primitives.cpp +0 -0
  328. /data/{mlx → submodules/mlx}/mlx/backend/gpu/slicing.cpp +0 -0
  329. /data/{mlx → submodules/mlx}/mlx/backend/gpu/slicing.h +0 -0
  330. /data/{mlx → submodules/mlx}/mlx/backend/metal/CMakeLists.txt +0 -0
  331. /data/{mlx → submodules/mlx}/mlx/backend/metal/allocator.cpp +0 -0
  332. /data/{mlx → submodules/mlx}/mlx/backend/metal/allocator.h +0 -0
  333. /data/{mlx → submodules/mlx}/mlx/backend/metal/binary.cpp +0 -0
  334. /data/{mlx → submodules/mlx}/mlx/backend/metal/binary.h +0 -0
  335. /data/{mlx → submodules/mlx}/mlx/backend/metal/compiled.cpp +0 -0
  336. /data/{mlx → submodules/mlx}/mlx/backend/metal/conv.cpp +0 -0
  337. /data/{mlx → submodules/mlx}/mlx/backend/metal/copy.cpp +0 -0
  338. /data/{mlx → submodules/mlx}/mlx/backend/metal/custom_kernel.cpp +0 -0
  339. /data/{mlx → submodules/mlx}/mlx/backend/metal/device.h +0 -0
  340. /data/{mlx → submodules/mlx}/mlx/backend/metal/device_info.cpp +0 -0
  341. /data/{mlx → submodules/mlx}/mlx/backend/metal/distributed.cpp +0 -0
  342. /data/{mlx → submodules/mlx}/mlx/backend/metal/eval.cpp +0 -0
  343. /data/{mlx → submodules/mlx}/mlx/backend/metal/event.cpp +0 -0
  344. /data/{mlx → submodules/mlx}/mlx/backend/metal/fence.cpp +0 -0
  345. /data/{mlx → submodules/mlx}/mlx/backend/metal/fft.cpp +0 -0
  346. /data/{mlx → submodules/mlx}/mlx/backend/metal/hadamard.cpp +0 -0
  347. /data/{mlx → submodules/mlx}/mlx/backend/metal/indexing.cpp +0 -0
  348. /data/{mlx → submodules/mlx}/mlx/backend/metal/jit/includes.h +0 -0
  349. /data/{mlx → submodules/mlx}/mlx/backend/metal/jit/indexing.h +0 -0
  350. /data/{mlx → submodules/mlx}/mlx/backend/metal/jit_kernels.cpp +0 -0
  351. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/CMakeLists.txt +0 -0
  352. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/arange.h +0 -0
  353. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/arange.metal +0 -0
  354. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/arg_reduce.metal +0 -0
  355. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/atomic.h +0 -0
  356. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/bf16.h +0 -0
  357. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/bf16_math.h +0 -0
  358. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/binary.h +0 -0
  359. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/binary.metal +0 -0
  360. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/binary_ops.h +0 -0
  361. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/binary_two.h +0 -0
  362. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/binary_two.metal +0 -0
  363. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/cexpf.h +0 -0
  364. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/complex.h +0 -0
  365. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/copy.h +0 -0
  366. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/copy.metal +0 -0
  367. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/defines.h +0 -0
  368. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/erf.h +0 -0
  369. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/expm1f.h +0 -0
  370. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/fence.metal +0 -0
  371. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/fft/radix.h +0 -0
  372. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/fft/readwrite.h +0 -0
  373. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/fft.h +0 -0
  374. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/fft.metal +0 -0
  375. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/fp4.h +0 -0
  376. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/fp8.h +0 -0
  377. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/fp_quantized.h +0 -0
  378. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/fp_quantized.metal +0 -0
  379. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/fp_quantized_nax.h +0 -0
  380. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/fp_quantized_nax.metal +0 -0
  381. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/gemv.metal +0 -0
  382. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/gemv_masked.h +0 -0
  383. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/gemv_masked.metal +0 -0
  384. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/hadamard.h +0 -0
  385. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/indexing/gather.h +0 -0
  386. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/indexing/gather_axis.h +0 -0
  387. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/indexing/gather_front.h +0 -0
  388. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/indexing/indexing.h +0 -0
  389. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/indexing/masked_scatter.h +0 -0
  390. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/indexing/scatter.h +0 -0
  391. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/indexing/scatter_axis.h +0 -0
  392. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/layer_norm.metal +0 -0
  393. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/logging.h +0 -0
  394. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/logsumexp.h +0 -0
  395. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/logsumexp.metal +0 -0
  396. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/quantized.h +0 -0
  397. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/quantized.metal +0 -0
  398. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/quantized_nax.h +0 -0
  399. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/quantized_nax.metal +0 -0
  400. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/quantized_utils.h +0 -0
  401. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/random.metal +0 -0
  402. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/reduce.h +0 -0
  403. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/reduce.metal +0 -0
  404. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/reduce_utils.h +0 -0
  405. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/reduction/ops.h +0 -0
  406. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/reduction/reduce_all.h +0 -0
  407. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/reduction/reduce_col.h +0 -0
  408. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/reduction/reduce_init.h +0 -0
  409. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/reduction/reduce_row.h +0 -0
  410. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/rms_norm.metal +0 -0
  411. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/rope.metal +0 -0
  412. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/scaled_dot_product_attention.metal +0 -0
  413. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/scan.h +0 -0
  414. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/scan.metal +0 -0
  415. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/sdpa_vector.h +0 -0
  416. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/softmax.h +0 -0
  417. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/softmax.metal +0 -0
  418. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/sort.h +0 -0
  419. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/sort.metal +0 -0
  420. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/steel/attn/attn.h +0 -0
  421. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/steel/attn/kernels/steel_attention.h +0 -0
  422. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/steel/attn/kernels/steel_attention.metal +0 -0
  423. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/steel/attn/kernels/steel_attention_nax.h +0 -0
  424. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/steel/attn/kernels/steel_attention_nax.metal +0 -0
  425. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/steel/attn/loader.h +0 -0
  426. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/steel/attn/mma.h +0 -0
  427. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/steel/attn/nax.h +0 -0
  428. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/steel/attn/params.h +0 -0
  429. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/steel/attn/transforms.h +0 -0
  430. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/steel/conv/conv.h +0 -0
  431. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/steel/conv/kernels/steel_conv.h +0 -0
  432. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/steel/conv/kernels/steel_conv.metal +0 -0
  433. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/steel/conv/kernels/steel_conv_general.h +0 -0
  434. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/steel/conv/kernels/steel_conv_general.metal +0 -0
  435. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/steel/conv/loader.h +0 -0
  436. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/steel/conv/loaders/loader_channel_l.h +0 -0
  437. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/steel/conv/loaders/loader_channel_n.h +0 -0
  438. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/steel/conv/loaders/loader_general.h +0 -0
  439. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/steel/conv/params.h +0 -0
  440. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/steel/defines.h +0 -0
  441. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/steel/gemm/gemm.h +0 -0
  442. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/steel/gemm/gemm_nax.h +0 -0
  443. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/steel/gemm/kernels/steel_gemm_fused.h +0 -0
  444. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/steel/gemm/kernels/steel_gemm_fused.metal +0 -0
  445. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/steel/gemm/kernels/steel_gemm_fused_nax.h +0 -0
  446. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/steel/gemm/kernels/steel_gemm_fused_nax.metal +0 -0
  447. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/steel/gemm/kernels/steel_gemm_gather.h +0 -0
  448. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/steel/gemm/kernels/steel_gemm_gather.metal +0 -0
  449. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/steel/gemm/kernels/steel_gemm_gather_nax.h +0 -0
  450. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/steel/gemm/kernels/steel_gemm_gather_nax.metal +0 -0
  451. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/steel/gemm/kernels/steel_gemm_masked.h +0 -0
  452. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/steel/gemm/kernels/steel_gemm_masked.metal +0 -0
  453. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/steel/gemm/kernels/steel_gemm_segmented.h +0 -0
  454. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/steel/gemm/kernels/steel_gemm_segmented.metal +0 -0
  455. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/steel/gemm/kernels/steel_gemm_splitk.h +0 -0
  456. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/steel/gemm/kernels/steel_gemm_splitk.metal +0 -0
  457. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/steel/gemm/kernels/steel_gemm_splitk_nax.h +0 -0
  458. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/steel/gemm/kernels/steel_gemm_splitk_nax.metal +0 -0
  459. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/steel/gemm/loader.h +0 -0
  460. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/steel/gemm/mma.h +0 -0
  461. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/steel/gemm/nax.h +0 -0
  462. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/steel/gemm/params.h +0 -0
  463. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/steel/gemm/transforms.h +0 -0
  464. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/steel/utils/integral_constant.h +0 -0
  465. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/steel/utils/type_traits.h +0 -0
  466. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/steel/utils.h +0 -0
  467. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/ternary.h +0 -0
  468. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/ternary.metal +0 -0
  469. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/ternary_ops.h +0 -0
  470. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/unary.h +0 -0
  471. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/unary.metal +0 -0
  472. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/unary_ops.h +0 -0
  473. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels/utils.h +0 -0
  474. /data/{mlx → submodules/mlx}/mlx/backend/metal/kernels.h +0 -0
  475. /data/{mlx → submodules/mlx}/mlx/backend/metal/logsumexp.cpp +0 -0
  476. /data/{mlx → submodules/mlx}/mlx/backend/metal/make_compiled_preamble.sh +0 -0
  477. /data/{mlx → submodules/mlx}/mlx/backend/metal/matmul.cpp +0 -0
  478. /data/{mlx → submodules/mlx}/mlx/backend/metal/matmul.h +0 -0
  479. /data/{mlx → submodules/mlx}/mlx/backend/metal/metal.cpp +0 -0
  480. /data/{mlx → submodules/mlx}/mlx/backend/metal/metal.h +0 -0
  481. /data/{mlx → submodules/mlx}/mlx/backend/metal/no_metal.cpp +0 -0
  482. /data/{mlx → submodules/mlx}/mlx/backend/metal/nojit_kernels.cpp +0 -0
  483. /data/{mlx → submodules/mlx}/mlx/backend/metal/normalization.cpp +0 -0
  484. /data/{mlx → submodules/mlx}/mlx/backend/metal/primitives.cpp +0 -0
  485. /data/{mlx → submodules/mlx}/mlx/backend/metal/quantized.cpp +0 -0
  486. /data/{mlx → submodules/mlx}/mlx/backend/metal/reduce.cpp +0 -0
  487. /data/{mlx → submodules/mlx}/mlx/backend/metal/reduce.h +0 -0
  488. /data/{mlx → submodules/mlx}/mlx/backend/metal/resident.cpp +0 -0
  489. /data/{mlx → submodules/mlx}/mlx/backend/metal/resident.h +0 -0
  490. /data/{mlx → submodules/mlx}/mlx/backend/metal/rope.cpp +0 -0
  491. /data/{mlx → submodules/mlx}/mlx/backend/metal/scaled_dot_product_attention.cpp +0 -0
  492. /data/{mlx → submodules/mlx}/mlx/backend/metal/scan.cpp +0 -0
  493. /data/{mlx → submodules/mlx}/mlx/backend/metal/scan.h +0 -0
  494. /data/{mlx → submodules/mlx}/mlx/backend/metal/slicing.cpp +0 -0
  495. /data/{mlx → submodules/mlx}/mlx/backend/metal/softmax.cpp +0 -0
  496. /data/{mlx → submodules/mlx}/mlx/backend/metal/sort.cpp +0 -0
  497. /data/{mlx → submodules/mlx}/mlx/backend/metal/ternary.cpp +0 -0
  498. /data/{mlx → submodules/mlx}/mlx/backend/metal/ternary.h +0 -0
  499. /data/{mlx → submodules/mlx}/mlx/backend/metal/unary.cpp +0 -0
  500. /data/{mlx → submodules/mlx}/mlx/backend/metal/unary.h +0 -0
  501. /data/{mlx → submodules/mlx}/mlx/backend/metal/utils.cpp +0 -0
  502. /data/{mlx → submodules/mlx}/mlx/backend/metal/utils.h +0 -0
  503. /data/{mlx → submodules/mlx}/mlx/backend/no_cpu/CMakeLists.txt +0 -0
  504. /data/{mlx → submodules/mlx}/mlx/backend/no_cpu/compiled.cpp +0 -0
  505. /data/{mlx → submodules/mlx}/mlx/backend/no_cpu/device_info.cpp +0 -0
  506. /data/{mlx → submodules/mlx}/mlx/backend/no_cpu/primitives.cpp +0 -0
  507. /data/{mlx → submodules/mlx}/mlx/backend/no_gpu/CMakeLists.txt +0 -0
  508. /data/{mlx → submodules/mlx}/mlx/backend/no_gpu/allocator.cpp +0 -0
  509. /data/{mlx → submodules/mlx}/mlx/backend/no_gpu/apple_memory.h +0 -0
  510. /data/{mlx → submodules/mlx}/mlx/backend/no_gpu/device_info.cpp +0 -0
  511. /data/{mlx → submodules/mlx}/mlx/backend/no_gpu/eval.cpp +0 -0
  512. /data/{mlx → submodules/mlx}/mlx/backend/no_gpu/event.cpp +0 -0
  513. /data/{mlx → submodules/mlx}/mlx/backend/no_gpu/fence.cpp +0 -0
  514. /data/{mlx → submodules/mlx}/mlx/backend/no_gpu/linux_memory.h +0 -0
  515. /data/{mlx → submodules/mlx}/mlx/backend/no_gpu/primitives.cpp +0 -0
  516. /data/{mlx → submodules/mlx}/mlx/compile.cpp +0 -0
  517. /data/{mlx → submodules/mlx}/mlx/compile.h +0 -0
  518. /data/{mlx → submodules/mlx}/mlx/compile_impl.h +0 -0
  519. /data/{mlx → submodules/mlx}/mlx/device.cpp +0 -0
  520. /data/{mlx → submodules/mlx}/mlx/device.h +0 -0
  521. /data/{mlx → submodules/mlx}/mlx/distributed/CMakeLists.txt +0 -0
  522. /data/{mlx → submodules/mlx}/mlx/distributed/distributed.cpp +0 -0
  523. /data/{mlx → submodules/mlx}/mlx/distributed/distributed.h +0 -0
  524. /data/{mlx → submodules/mlx}/mlx/distributed/distributed_impl.h +0 -0
  525. /data/{mlx → submodules/mlx}/mlx/distributed/jaccl/CMakeLists.txt +0 -0
  526. /data/{mlx → submodules/mlx}/mlx/distributed/jaccl/jaccl.cpp +0 -0
  527. /data/{mlx → submodules/mlx}/mlx/distributed/jaccl/jaccl.h +0 -0
  528. /data/{mlx → submodules/mlx}/mlx/distributed/jaccl/mesh.cpp +0 -0
  529. /data/{mlx → submodules/mlx}/mlx/distributed/jaccl/mesh.h +0 -0
  530. /data/{mlx → submodules/mlx}/mlx/distributed/jaccl/no_jaccl.cpp +0 -0
  531. /data/{mlx → submodules/mlx}/mlx/distributed/jaccl/ring.cpp +0 -0
  532. /data/{mlx → submodules/mlx}/mlx/distributed/jaccl/ring.h +0 -0
  533. /data/{mlx → submodules/mlx}/mlx/distributed/jaccl/utils.cpp +0 -0
  534. /data/{mlx → submodules/mlx}/mlx/distributed/jaccl/utils.h +0 -0
  535. /data/{mlx → submodules/mlx}/mlx/distributed/mpi/CMakeLists.txt +0 -0
  536. /data/{mlx → submodules/mlx}/mlx/distributed/mpi/mpi.cpp +0 -0
  537. /data/{mlx → submodules/mlx}/mlx/distributed/mpi/mpi.h +0 -0
  538. /data/{mlx → submodules/mlx}/mlx/distributed/mpi/mpi_declarations.h +0 -0
  539. /data/{mlx → submodules/mlx}/mlx/distributed/mpi/no_mpi.cpp +0 -0
  540. /data/{mlx → submodules/mlx}/mlx/distributed/nccl/CMakeLists.txt +0 -0
  541. /data/{mlx → submodules/mlx}/mlx/distributed/nccl/nccl.cpp +0 -0
  542. /data/{mlx → submodules/mlx}/mlx/distributed/nccl/nccl.h +0 -0
  543. /data/{mlx → submodules/mlx}/mlx/distributed/nccl/nccl_stub/CMakeLists.txt +0 -0
  544. /data/{mlx → submodules/mlx}/mlx/distributed/nccl/nccl_stub/nccl_stubs.cpp +0 -0
  545. /data/{mlx → submodules/mlx}/mlx/distributed/nccl/no_nccl.cpp +0 -0
  546. /data/{mlx → submodules/mlx}/mlx/distributed/ops.cpp +0 -0
  547. /data/{mlx → submodules/mlx}/mlx/distributed/ops.h +0 -0
  548. /data/{mlx → submodules/mlx}/mlx/distributed/primitives.cpp +0 -0
  549. /data/{mlx → submodules/mlx}/mlx/distributed/primitives.h +0 -0
  550. /data/{mlx → submodules/mlx}/mlx/distributed/reduction_ops.h +0 -0
  551. /data/{mlx → submodules/mlx}/mlx/distributed/ring/CMakeLists.txt +0 -0
  552. /data/{mlx → submodules/mlx}/mlx/distributed/ring/no_ring.cpp +0 -0
  553. /data/{mlx → submodules/mlx}/mlx/distributed/ring/ring.cpp +0 -0
  554. /data/{mlx → submodules/mlx}/mlx/distributed/ring/ring.h +0 -0
  555. /data/{mlx → submodules/mlx}/mlx/distributed/utils.cpp +0 -0
  556. /data/{mlx → submodules/mlx}/mlx/distributed/utils.h +0 -0
  557. /data/{mlx → submodules/mlx}/mlx/dtype.cpp +0 -0
  558. /data/{mlx → submodules/mlx}/mlx/dtype.h +0 -0
  559. /data/{mlx → submodules/mlx}/mlx/dtype_utils.cpp +0 -0
  560. /data/{mlx → submodules/mlx}/mlx/dtype_utils.h +0 -0
  561. /data/{mlx → submodules/mlx}/mlx/einsum.cpp +0 -0
  562. /data/{mlx → submodules/mlx}/mlx/einsum.h +0 -0
  563. /data/{mlx → submodules/mlx}/mlx/event.h +0 -0
  564. /data/{mlx → submodules/mlx}/mlx/export.h +0 -0
  565. /data/{mlx → submodules/mlx}/mlx/export_impl.h +0 -0
  566. /data/{mlx → submodules/mlx}/mlx/fast.cpp +0 -0
  567. /data/{mlx → submodules/mlx}/mlx/fast.h +0 -0
  568. /data/{mlx → submodules/mlx}/mlx/fast_primitives.h +0 -0
  569. /data/{mlx → submodules/mlx}/mlx/fence.h +0 -0
  570. /data/{mlx → submodules/mlx}/mlx/fft.cpp +0 -0
  571. /data/{mlx → submodules/mlx}/mlx/fft.h +0 -0
  572. /data/{mlx → submodules/mlx}/mlx/graph_utils.cpp +0 -0
  573. /data/{mlx → submodules/mlx}/mlx/graph_utils.h +0 -0
  574. /data/{mlx → submodules/mlx}/mlx/io/CMakeLists.txt +0 -0
  575. /data/{mlx → submodules/mlx}/mlx/io/gguf.cpp +0 -0
  576. /data/{mlx → submodules/mlx}/mlx/io/gguf.h +0 -0
  577. /data/{mlx → submodules/mlx}/mlx/io/gguf_quants.cpp +0 -0
  578. /data/{mlx → submodules/mlx}/mlx/io/load.cpp +0 -0
  579. /data/{mlx → submodules/mlx}/mlx/io/load.h +0 -0
  580. /data/{mlx → submodules/mlx}/mlx/io/no_gguf.cpp +0 -0
  581. /data/{mlx → submodules/mlx}/mlx/io/no_safetensors.cpp +0 -0
  582. /data/{mlx → submodules/mlx}/mlx/io/safetensors.cpp +0 -0
  583. /data/{mlx → submodules/mlx}/mlx/io.h +0 -0
  584. /data/{mlx → submodules/mlx}/mlx/linalg.cpp +0 -0
  585. /data/{mlx → submodules/mlx}/mlx/linalg.h +0 -0
  586. /data/{mlx → submodules/mlx}/mlx/memory.h +0 -0
  587. /data/{mlx → submodules/mlx}/mlx/mlx.h +0 -0
  588. /data/{mlx → submodules/mlx}/mlx/primitives.h +0 -0
  589. /data/{mlx → submodules/mlx}/mlx/random.cpp +0 -0
  590. /data/{mlx → submodules/mlx}/mlx/random.h +0 -0
  591. /data/{mlx → submodules/mlx}/mlx/small_vector.h +0 -0
  592. /data/{mlx → submodules/mlx}/mlx/threadpool.h +0 -0
  593. /data/{mlx → submodules/mlx}/mlx/transforms.cpp +0 -0
  594. /data/{mlx → submodules/mlx}/mlx/transforms.h +0 -0
  595. /data/{mlx → submodules/mlx}/mlx/transforms_impl.h +0 -0
  596. /data/{mlx → submodules/mlx}/mlx/types/bf16.h +0 -0
  597. /data/{mlx → submodules/mlx}/mlx/types/complex.h +0 -0
  598. /data/{mlx → submodules/mlx}/mlx/types/fp16.h +0 -0
  599. /data/{mlx → submodules/mlx}/mlx/types/half_types.h +0 -0
  600. /data/{mlx → submodules/mlx}/mlx/types/limits.h +0 -0
  601. /data/{mlx → submodules/mlx}/mlx/utils.cpp +0 -0
  602. /data/{mlx → submodules/mlx}/mlx/utils.h +0 -0
  603. /data/{mlx → submodules/mlx}/mlx/version.cpp +0 -0
  604. /data/{mlx → submodules/mlx}/mlx/version.h +0 -0
  605. /data/{mlx → submodules/mlx}/mlx.pc.in +0 -0
metadata CHANGED
@@ -1,15 +1,85 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: mlx
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.30.7.2
4
+ version: 0.30.7.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - MLX Contributors
8
8
  - Aleksey Skryl
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2026-02-14 00:00:00.000000000 Z
12
- dependencies: []
11
+ date: 1980-01-02 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: rake
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: base64
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: ostruct
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: minitest
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: benchmark
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
13
83
  description: A Ruby wrapper for the native MLX machine learning runtime.
14
84
  email:
15
85
  - mlx@group.apple.com
@@ -19,23 +89,36 @@ extensions:
19
89
  - ext/mlx/extconf.rb
20
90
  extra_rdoc_files: []
21
91
  files:
92
+ - ext/mlx-onnx/native.cpp
93
+ - ext/mlx-onnx/native.hpp
22
94
  - ext/mlx/extconf.rb
23
95
  - ext/mlx/native.cpp
96
+ - lib/mlx-onnx/webgpu_harness.rb
24
97
  - lib/mlx.rb
25
98
  - lib/mlx/core.rb
26
99
  - lib/mlx/distributed_utils/common.rb
27
100
  - lib/mlx/distributed_utils/config.rb
28
101
  - lib/mlx/distributed_utils/launch.rb
29
102
  - lib/mlx/dsl.rb
103
+ - lib/mlx/dsl/attention.rb
30
104
  - lib/mlx/dsl/builder.rb
105
+ - lib/mlx/dsl/config_schema.rb
31
106
  - lib/mlx/dsl/data_pipeline.rb
32
107
  - lib/mlx/dsl/experiment.rb
108
+ - lib/mlx/dsl/generate.rb
33
109
  - lib/mlx/dsl/graph_modules.rb
110
+ - lib/mlx/dsl/kv_cache.rb
111
+ - lib/mlx/dsl/masks.rb
34
112
  - lib/mlx/dsl/model.rb
35
113
  - lib/mlx/dsl/model_mixin.rb
114
+ - lib/mlx/dsl/positions.rb
115
+ - lib/mlx/dsl/run_stack.rb
36
116
  - lib/mlx/dsl/split_plan.rb
117
+ - lib/mlx/dsl/tensor.rb
37
118
  - lib/mlx/dsl/train_step.rb
38
119
  - lib/mlx/dsl/trainer.rb
120
+ - lib/mlx/dsl/transformer_block.rb
121
+ - lib/mlx/dsl/weight_map.rb
39
122
  - lib/mlx/extension.rb
40
123
  - lib/mlx/nn.rb
41
124
  - lib/mlx/nn/base.rb
@@ -59,573 +142,588 @@ files:
59
142
  - lib/mlx/nn/layers/upsample.rb
60
143
  - lib/mlx/nn/losses.rb
61
144
  - lib/mlx/nn/utils.rb
145
+ - lib/mlx/onnx.rb
62
146
  - lib/mlx/optimizers.rb
63
147
  - lib/mlx/optimizers/optimizers.rb
64
148
  - lib/mlx/optimizers/schedulers.rb
65
149
  - lib/mlx/utils.rb
66
150
  - lib/mlx/version.rb
67
- - mlx/CMakeLists.txt
68
- - mlx/cmake/FindCUDNN.cmake
69
- - mlx/cmake/FindNCCL.cmake
70
- - mlx/cmake/Findnvpl.cmake
71
- - mlx/cmake/extension.cmake
72
- - mlx/mlx.pc.in
73
- - mlx/mlx/3rdparty/.clang-format
74
- - mlx/mlx/3rdparty/pocketfft.h
75
- - mlx/mlx/CMakeLists.txt
76
- - mlx/mlx/allocator.h
77
- - mlx/mlx/api.h
78
- - mlx/mlx/array.cpp
79
- - mlx/mlx/array.h
80
- - mlx/mlx/backend/common/CMakeLists.txt
81
- - mlx/mlx/backend/common/binary.h
82
- - mlx/mlx/backend/common/broadcasting.cpp
83
- - mlx/mlx/backend/common/broadcasting.h
84
- - mlx/mlx/backend/common/buffer_cache.h
85
- - mlx/mlx/backend/common/common.cpp
86
- - mlx/mlx/backend/common/compiled.cpp
87
- - mlx/mlx/backend/common/compiled.h
88
- - mlx/mlx/backend/common/copy.h
89
- - mlx/mlx/backend/common/hadamard.h
90
- - mlx/mlx/backend/common/load.cpp
91
- - mlx/mlx/backend/common/matmul.h
92
- - mlx/mlx/backend/common/reduce.cpp
93
- - mlx/mlx/backend/common/reduce.h
94
- - mlx/mlx/backend/common/slicing.cpp
95
- - mlx/mlx/backend/common/slicing.h
96
- - mlx/mlx/backend/common/ternary.h
97
- - mlx/mlx/backend/common/unary.h
98
- - mlx/mlx/backend/common/utils.cpp
99
- - mlx/mlx/backend/common/utils.h
100
- - mlx/mlx/backend/cpu/CMakeLists.txt
101
- - mlx/mlx/backend/cpu/arange.h
102
- - mlx/mlx/backend/cpu/arg_reduce.cpp
103
- - mlx/mlx/backend/cpu/binary.cpp
104
- - mlx/mlx/backend/cpu/binary.h
105
- - mlx/mlx/backend/cpu/binary_ops.h
106
- - mlx/mlx/backend/cpu/binary_two.h
107
- - mlx/mlx/backend/cpu/cholesky.cpp
108
- - mlx/mlx/backend/cpu/compiled.cpp
109
- - mlx/mlx/backend/cpu/compiled_preamble.h
110
- - mlx/mlx/backend/cpu/conv.cpp
111
- - mlx/mlx/backend/cpu/copy.cpp
112
- - mlx/mlx/backend/cpu/copy.h
113
- - mlx/mlx/backend/cpu/device_info.cpp
114
- - mlx/mlx/backend/cpu/device_info.h
115
- - mlx/mlx/backend/cpu/distributed.cpp
116
- - mlx/mlx/backend/cpu/eig.cpp
117
- - mlx/mlx/backend/cpu/eigh.cpp
118
- - mlx/mlx/backend/cpu/encoder.cpp
119
- - mlx/mlx/backend/cpu/encoder.h
120
- - mlx/mlx/backend/cpu/eval.cpp
121
- - mlx/mlx/backend/cpu/eval.h
122
- - mlx/mlx/backend/cpu/fft.cpp
123
- - mlx/mlx/backend/cpu/gemm.h
124
- - mlx/mlx/backend/cpu/gemms/bnns.cpp
125
- - mlx/mlx/backend/cpu/gemms/cblas.cpp
126
- - mlx/mlx/backend/cpu/gemms/simd_bf16.cpp
127
- - mlx/mlx/backend/cpu/gemms/simd_fp16.cpp
128
- - mlx/mlx/backend/cpu/gemms/simd_gemm.h
129
- - mlx/mlx/backend/cpu/hadamard.cpp
130
- - mlx/mlx/backend/cpu/indexing.cpp
131
- - mlx/mlx/backend/cpu/inverse.cpp
132
- - mlx/mlx/backend/cpu/jit_compiler.cpp
133
- - mlx/mlx/backend/cpu/jit_compiler.h
134
- - mlx/mlx/backend/cpu/lapack.h
135
- - mlx/mlx/backend/cpu/logsumexp.cpp
136
- - mlx/mlx/backend/cpu/luf.cpp
137
- - mlx/mlx/backend/cpu/make_compiled_preamble.ps1
138
- - mlx/mlx/backend/cpu/make_compiled_preamble.sh
139
- - mlx/mlx/backend/cpu/masked_mm.cpp
140
- - mlx/mlx/backend/cpu/matmul.cpp
141
- - mlx/mlx/backend/cpu/primitives.cpp
142
- - mlx/mlx/backend/cpu/qrf.cpp
143
- - mlx/mlx/backend/cpu/quantized.cpp
144
- - mlx/mlx/backend/cpu/reduce.cpp
145
- - mlx/mlx/backend/cpu/scan.cpp
146
- - mlx/mlx/backend/cpu/select.cpp
147
- - mlx/mlx/backend/cpu/simd/accelerate_fp16_simd.h
148
- - mlx/mlx/backend/cpu/simd/accelerate_simd.h
149
- - mlx/mlx/backend/cpu/simd/base_simd.h
150
- - mlx/mlx/backend/cpu/simd/math.h
151
- - mlx/mlx/backend/cpu/simd/neon_fp16_simd.h
152
- - mlx/mlx/backend/cpu/simd/simd.h
153
- - mlx/mlx/backend/cpu/simd/type.h
154
- - mlx/mlx/backend/cpu/slicing.h
155
- - mlx/mlx/backend/cpu/softmax.cpp
156
- - mlx/mlx/backend/cpu/sort.cpp
157
- - mlx/mlx/backend/cpu/svd.cpp
158
- - mlx/mlx/backend/cpu/ternary.h
159
- - mlx/mlx/backend/cpu/threefry.cpp
160
- - mlx/mlx/backend/cpu/threefry.h
161
- - mlx/mlx/backend/cpu/unary.cpp
162
- - mlx/mlx/backend/cpu/unary.h
163
- - mlx/mlx/backend/cpu/unary_ops.h
164
- - mlx/mlx/backend/cuda/CMakeLists.txt
165
- - mlx/mlx/backend/cuda/allocator.cpp
166
- - mlx/mlx/backend/cuda/allocator.h
167
- - mlx/mlx/backend/cuda/arange.cu
168
- - mlx/mlx/backend/cuda/arg_reduce.cu
169
- - mlx/mlx/backend/cuda/bin2h.cmake
170
- - mlx/mlx/backend/cuda/binary/CMakeLists.txt
171
- - mlx/mlx/backend/cuda/binary/add.cu
172
- - mlx/mlx/backend/cuda/binary/arctan2.cu
173
- - mlx/mlx/backend/cuda/binary/binary.cuh
174
- - mlx/mlx/backend/cuda/binary/bitwise_binary.cu
175
- - mlx/mlx/backend/cuda/binary/divide.cu
176
- - mlx/mlx/backend/cuda/binary/equal.cu
177
- - mlx/mlx/backend/cuda/binary/greater.cu
178
- - mlx/mlx/backend/cuda/binary/greater_equal.cu
179
- - mlx/mlx/backend/cuda/binary/less.cu
180
- - mlx/mlx/backend/cuda/binary/less_equal.cu
181
- - mlx/mlx/backend/cuda/binary/log_add_exp.cu
182
- - mlx/mlx/backend/cuda/binary/logical_and.cu
183
- - mlx/mlx/backend/cuda/binary/logical_or.cu
184
- - mlx/mlx/backend/cuda/binary/maximum.cu
185
- - mlx/mlx/backend/cuda/binary/minimum.cu
186
- - mlx/mlx/backend/cuda/binary/multiply.cu
187
- - mlx/mlx/backend/cuda/binary/not_equal.cu
188
- - mlx/mlx/backend/cuda/binary/power.cu
189
- - mlx/mlx/backend/cuda/binary/remainder.cu
190
- - mlx/mlx/backend/cuda/binary/subtract.cu
191
- - mlx/mlx/backend/cuda/binary_two.cu
192
- - mlx/mlx/backend/cuda/compiled.cpp
193
- - mlx/mlx/backend/cuda/conv.cpp
194
- - mlx/mlx/backend/cuda/conv/conv.h
195
- - mlx/mlx/backend/cuda/conv/gemm_conv.cu
196
- - mlx/mlx/backend/cuda/conv/gemm_grouped_conv.cu
197
- - mlx/mlx/backend/cuda/copy.cu
198
- - mlx/mlx/backend/cuda/copy/copy.cuh
199
- - mlx/mlx/backend/cuda/copy/copy_contiguous.cu
200
- - mlx/mlx/backend/cuda/copy/copy_general.cu
201
- - mlx/mlx/backend/cuda/copy/copy_general_dynamic.cu
202
- - mlx/mlx/backend/cuda/copy/copy_general_input.cu
203
- - mlx/mlx/backend/cuda/cublas_utils.cpp
204
- - mlx/mlx/backend/cuda/cublas_utils.h
205
- - mlx/mlx/backend/cuda/cuda.h
206
- - mlx/mlx/backend/cuda/cuda_utils.h
207
- - mlx/mlx/backend/cuda/cudnn_utils.cpp
208
- - mlx/mlx/backend/cuda/cudnn_utils.h
209
- - mlx/mlx/backend/cuda/custom_kernel.cpp
210
- - mlx/mlx/backend/cuda/cutlass_utils.cuh
211
- - mlx/mlx/backend/cuda/delayload.cpp
212
- - mlx/mlx/backend/cuda/device.cpp
213
- - mlx/mlx/backend/cuda/device.h
214
- - mlx/mlx/backend/cuda/device/atomic_ops.cuh
215
- - mlx/mlx/backend/cuda/device/binary_ops.cuh
216
- - mlx/mlx/backend/cuda/device/cast_op.cuh
217
- - mlx/mlx/backend/cuda/device/complex.cuh
218
- - mlx/mlx/backend/cuda/device/config.h
219
- - mlx/mlx/backend/cuda/device/fp16_math.cuh
220
- - mlx/mlx/backend/cuda/device/gather.cuh
221
- - mlx/mlx/backend/cuda/device/gather_axis.cuh
222
- - mlx/mlx/backend/cuda/device/indexing.cuh
223
- - mlx/mlx/backend/cuda/device/scatter.cuh
224
- - mlx/mlx/backend/cuda/device/scatter_axis.cuh
225
- - mlx/mlx/backend/cuda/device/scatter_ops.cuh
226
- - mlx/mlx/backend/cuda/device/ternary_ops.cuh
227
- - mlx/mlx/backend/cuda/device/unary_ops.cuh
228
- - mlx/mlx/backend/cuda/device/utils.cuh
229
- - mlx/mlx/backend/cuda/device_info.cpp
230
- - mlx/mlx/backend/cuda/distributed.cu
231
- - mlx/mlx/backend/cuda/eval.cpp
232
- - mlx/mlx/backend/cuda/event.cu
233
- - mlx/mlx/backend/cuda/event.h
234
- - mlx/mlx/backend/cuda/fence.cpp
235
- - mlx/mlx/backend/cuda/gemms/cublas_gemm.cpp
236
- - mlx/mlx/backend/cuda/gemms/cublas_gemm.h
237
- - mlx/mlx/backend/cuda/gemms/cublas_gemm_batched_12_0.cpp
238
- - mlx/mlx/backend/cuda/gemms/cublas_gemm_batched_12_9.cu
239
- - mlx/mlx/backend/cuda/gemms/gemv.cu
240
- - mlx/mlx/backend/cuda/gemms/gemv.h
241
- - mlx/mlx/backend/cuda/gemms/grouped_gemm.h
242
- - mlx/mlx/backend/cuda/gemms/grouped_gemm_unaligned.cu
243
- - mlx/mlx/backend/cuda/indexing.cpp
244
- - mlx/mlx/backend/cuda/jit_module.cpp
245
- - mlx/mlx/backend/cuda/jit_module.h
246
- - mlx/mlx/backend/cuda/kernel_utils.cu
247
- - mlx/mlx/backend/cuda/kernel_utils.cuh
248
- - mlx/mlx/backend/cuda/layer_norm.cu
249
- - mlx/mlx/backend/cuda/load.cpp
250
- - mlx/mlx/backend/cuda/logsumexp.cu
251
- - mlx/mlx/backend/cuda/lru_cache.h
252
- - mlx/mlx/backend/cuda/matmul.cpp
253
- - mlx/mlx/backend/cuda/no_cuda.cpp
254
- - mlx/mlx/backend/cuda/primitives.cpp
255
- - mlx/mlx/backend/cuda/quantized/affine_quantize.cu
256
- - mlx/mlx/backend/cuda/quantized/convert_fp8.cu
257
- - mlx/mlx/backend/cuda/quantized/cublas_qqmm.cpp
258
- - mlx/mlx/backend/cuda/quantized/cublas_qqmm.h
259
- - mlx/mlx/backend/cuda/quantized/cuda_fp4.h
260
- - mlx/mlx/backend/cuda/quantized/fp_quantize.cu
261
- - mlx/mlx/backend/cuda/quantized/mxfp8_quantize.cuh
262
- - mlx/mlx/backend/cuda/quantized/no_qqmm_impl.cpp
263
- - mlx/mlx/backend/cuda/quantized/nvfp4_quantize.cuh
264
- - mlx/mlx/backend/cuda/quantized/qmv.cu
265
- - mlx/mlx/backend/cuda/quantized/qmv.h
266
- - mlx/mlx/backend/cuda/quantized/qqmm.cpp
267
- - mlx/mlx/backend/cuda/quantized/qqmm_impl.cpp
268
- - mlx/mlx/backend/cuda/quantized/qqmm_impl.h
269
- - mlx/mlx/backend/cuda/quantized/qqmm_utils.cu
270
- - mlx/mlx/backend/cuda/quantized/qqmm_utils.h
271
- - mlx/mlx/backend/cuda/quantized/quantized.cpp
272
- - mlx/mlx/backend/cuda/quantized/quantized.h
273
- - mlx/mlx/backend/cuda/quantized/quantized_utils.cuh
274
- - mlx/mlx/backend/cuda/quantized/quantized_utils.h
275
- - mlx/mlx/backend/cuda/random.cu
276
- - mlx/mlx/backend/cuda/reduce.cu
277
- - mlx/mlx/backend/cuda/reduce/all_reduce.cu
278
- - mlx/mlx/backend/cuda/reduce/col_reduce.cu
279
- - mlx/mlx/backend/cuda/reduce/init_reduce.cu
280
- - mlx/mlx/backend/cuda/reduce/reduce.cuh
281
- - mlx/mlx/backend/cuda/reduce/reduce_ops.cuh
282
- - mlx/mlx/backend/cuda/reduce/reduce_utils.cuh
283
- - mlx/mlx/backend/cuda/reduce/row_reduce.cu
284
- - mlx/mlx/backend/cuda/rms_norm.cu
285
- - mlx/mlx/backend/cuda/rope.cu
286
- - mlx/mlx/backend/cuda/scaled_dot_product_attention.cpp
287
- - mlx/mlx/backend/cuda/scaled_dot_product_attention.cu
288
- - mlx/mlx/backend/cuda/scan.cu
289
- - mlx/mlx/backend/cuda/slicing.cpp
290
- - mlx/mlx/backend/cuda/softmax.cu
291
- - mlx/mlx/backend/cuda/sort.cu
292
- - mlx/mlx/backend/cuda/steel/defines.cuh
293
- - mlx/mlx/backend/cuda/steel/gemm.cuh
294
- - mlx/mlx/backend/cuda/steel/mma.cuh
295
- - mlx/mlx/backend/cuda/steel/tiles.cuh
296
- - mlx/mlx/backend/cuda/steel/utils.cuh
297
- - mlx/mlx/backend/cuda/ternary.cu
298
- - mlx/mlx/backend/cuda/unary/CMakeLists.txt
299
- - mlx/mlx/backend/cuda/unary/abs.cu
300
- - mlx/mlx/backend/cuda/unary/arccos.cu
301
- - mlx/mlx/backend/cuda/unary/arccosh.cu
302
- - mlx/mlx/backend/cuda/unary/arcsin.cu
303
- - mlx/mlx/backend/cuda/unary/arcsinh.cu
304
- - mlx/mlx/backend/cuda/unary/arctan.cu
305
- - mlx/mlx/backend/cuda/unary/arctanh.cu
306
- - mlx/mlx/backend/cuda/unary/bitwise_invert.cu
307
- - mlx/mlx/backend/cuda/unary/ceil.cu
308
- - mlx/mlx/backend/cuda/unary/conjugate.cu
309
- - mlx/mlx/backend/cuda/unary/cos.cu
310
- - mlx/mlx/backend/cuda/unary/cosh.cu
311
- - mlx/mlx/backend/cuda/unary/erf.cu
312
- - mlx/mlx/backend/cuda/unary/erf_inv.cu
313
- - mlx/mlx/backend/cuda/unary/exp.cu
314
- - mlx/mlx/backend/cuda/unary/expm1.cu
315
- - mlx/mlx/backend/cuda/unary/floor.cu
316
- - mlx/mlx/backend/cuda/unary/imag.cu
317
- - mlx/mlx/backend/cuda/unary/log.cu
318
- - mlx/mlx/backend/cuda/unary/log1p.cu
319
- - mlx/mlx/backend/cuda/unary/logical_not.cu
320
- - mlx/mlx/backend/cuda/unary/negative.cu
321
- - mlx/mlx/backend/cuda/unary/real.cu
322
- - mlx/mlx/backend/cuda/unary/round.cu
323
- - mlx/mlx/backend/cuda/unary/sigmoid.cu
324
- - mlx/mlx/backend/cuda/unary/sign.cu
325
- - mlx/mlx/backend/cuda/unary/sin.cu
326
- - mlx/mlx/backend/cuda/unary/sinh.cu
327
- - mlx/mlx/backend/cuda/unary/sqrt.cu
328
- - mlx/mlx/backend/cuda/unary/square.cu
329
- - mlx/mlx/backend/cuda/unary/tan.cu
330
- - mlx/mlx/backend/cuda/unary/tanh.cu
331
- - mlx/mlx/backend/cuda/unary/unary.cuh
332
- - mlx/mlx/backend/cuda/utils.cpp
333
- - mlx/mlx/backend/cuda/utils.h
334
- - mlx/mlx/backend/cuda/vector_types.cuh
335
- - mlx/mlx/backend/cuda/worker.cpp
336
- - mlx/mlx/backend/cuda/worker.h
337
- - mlx/mlx/backend/gpu/CMakeLists.txt
338
- - mlx/mlx/backend/gpu/copy.cpp
339
- - mlx/mlx/backend/gpu/copy.h
340
- - mlx/mlx/backend/gpu/device_info.h
341
- - mlx/mlx/backend/gpu/eval.h
342
- - mlx/mlx/backend/gpu/primitives.cpp
343
- - mlx/mlx/backend/gpu/slicing.cpp
344
- - mlx/mlx/backend/gpu/slicing.h
345
- - mlx/mlx/backend/metal/CMakeLists.txt
346
- - mlx/mlx/backend/metal/allocator.cpp
347
- - mlx/mlx/backend/metal/allocator.h
348
- - mlx/mlx/backend/metal/binary.cpp
349
- - mlx/mlx/backend/metal/binary.h
350
- - mlx/mlx/backend/metal/compiled.cpp
351
- - mlx/mlx/backend/metal/conv.cpp
352
- - mlx/mlx/backend/metal/copy.cpp
353
- - mlx/mlx/backend/metal/custom_kernel.cpp
354
- - mlx/mlx/backend/metal/device.cpp
355
- - mlx/mlx/backend/metal/device.h
356
- - mlx/mlx/backend/metal/device_info.cpp
357
- - mlx/mlx/backend/metal/distributed.cpp
358
- - mlx/mlx/backend/metal/eval.cpp
359
- - mlx/mlx/backend/metal/event.cpp
360
- - mlx/mlx/backend/metal/fence.cpp
361
- - mlx/mlx/backend/metal/fft.cpp
362
- - mlx/mlx/backend/metal/hadamard.cpp
363
- - mlx/mlx/backend/metal/indexing.cpp
364
- - mlx/mlx/backend/metal/jit/includes.h
365
- - mlx/mlx/backend/metal/jit/indexing.h
366
- - mlx/mlx/backend/metal/jit_kernels.cpp
367
- - mlx/mlx/backend/metal/kernels.h
368
- - mlx/mlx/backend/metal/kernels/CMakeLists.txt
369
- - mlx/mlx/backend/metal/kernels/arange.h
370
- - mlx/mlx/backend/metal/kernels/arange.metal
371
- - mlx/mlx/backend/metal/kernels/arg_reduce.metal
372
- - mlx/mlx/backend/metal/kernels/atomic.h
373
- - mlx/mlx/backend/metal/kernels/bf16.h
374
- - mlx/mlx/backend/metal/kernels/bf16_math.h
375
- - mlx/mlx/backend/metal/kernels/binary.h
376
- - mlx/mlx/backend/metal/kernels/binary.metal
377
- - mlx/mlx/backend/metal/kernels/binary_ops.h
378
- - mlx/mlx/backend/metal/kernels/binary_two.h
379
- - mlx/mlx/backend/metal/kernels/binary_two.metal
380
- - mlx/mlx/backend/metal/kernels/cexpf.h
381
- - mlx/mlx/backend/metal/kernels/complex.h
382
- - mlx/mlx/backend/metal/kernels/conv.metal
383
- - mlx/mlx/backend/metal/kernels/copy.h
384
- - mlx/mlx/backend/metal/kernels/copy.metal
385
- - mlx/mlx/backend/metal/kernels/defines.h
386
- - mlx/mlx/backend/metal/kernels/erf.h
387
- - mlx/mlx/backend/metal/kernels/expm1f.h
388
- - mlx/mlx/backend/metal/kernels/fence.metal
389
- - mlx/mlx/backend/metal/kernels/fft.h
390
- - mlx/mlx/backend/metal/kernels/fft.metal
391
- - mlx/mlx/backend/metal/kernels/fft/radix.h
392
- - mlx/mlx/backend/metal/kernels/fft/readwrite.h
393
- - mlx/mlx/backend/metal/kernels/fp4.h
394
- - mlx/mlx/backend/metal/kernels/fp8.h
395
- - mlx/mlx/backend/metal/kernels/fp_quantized.h
396
- - mlx/mlx/backend/metal/kernels/fp_quantized.metal
397
- - mlx/mlx/backend/metal/kernels/fp_quantized_nax.h
398
- - mlx/mlx/backend/metal/kernels/fp_quantized_nax.metal
399
- - mlx/mlx/backend/metal/kernels/gemv.metal
400
- - mlx/mlx/backend/metal/kernels/gemv_masked.h
401
- - mlx/mlx/backend/metal/kernels/gemv_masked.metal
402
- - mlx/mlx/backend/metal/kernels/hadamard.h
403
- - mlx/mlx/backend/metal/kernels/indexing/gather.h
404
- - mlx/mlx/backend/metal/kernels/indexing/gather_axis.h
405
- - mlx/mlx/backend/metal/kernels/indexing/gather_front.h
406
- - mlx/mlx/backend/metal/kernels/indexing/indexing.h
407
- - mlx/mlx/backend/metal/kernels/indexing/masked_scatter.h
408
- - mlx/mlx/backend/metal/kernels/indexing/scatter.h
409
- - mlx/mlx/backend/metal/kernels/indexing/scatter_axis.h
410
- - mlx/mlx/backend/metal/kernels/layer_norm.metal
411
- - mlx/mlx/backend/metal/kernels/logging.h
412
- - mlx/mlx/backend/metal/kernels/logsumexp.h
413
- - mlx/mlx/backend/metal/kernels/logsumexp.metal
414
- - mlx/mlx/backend/metal/kernels/quantized.h
415
- - mlx/mlx/backend/metal/kernels/quantized.metal
416
- - mlx/mlx/backend/metal/kernels/quantized_nax.h
417
- - mlx/mlx/backend/metal/kernels/quantized_nax.metal
418
- - mlx/mlx/backend/metal/kernels/quantized_utils.h
419
- - mlx/mlx/backend/metal/kernels/random.metal
420
- - mlx/mlx/backend/metal/kernels/reduce.h
421
- - mlx/mlx/backend/metal/kernels/reduce.metal
422
- - mlx/mlx/backend/metal/kernels/reduce_utils.h
423
- - mlx/mlx/backend/metal/kernels/reduction/ops.h
424
- - mlx/mlx/backend/metal/kernels/reduction/reduce_all.h
425
- - mlx/mlx/backend/metal/kernels/reduction/reduce_col.h
426
- - mlx/mlx/backend/metal/kernels/reduction/reduce_init.h
427
- - mlx/mlx/backend/metal/kernels/reduction/reduce_row.h
428
- - mlx/mlx/backend/metal/kernels/rms_norm.metal
429
- - mlx/mlx/backend/metal/kernels/rope.metal
430
- - mlx/mlx/backend/metal/kernels/scaled_dot_product_attention.metal
431
- - mlx/mlx/backend/metal/kernels/scan.h
432
- - mlx/mlx/backend/metal/kernels/scan.metal
433
- - mlx/mlx/backend/metal/kernels/sdpa_vector.h
434
- - mlx/mlx/backend/metal/kernels/softmax.h
435
- - mlx/mlx/backend/metal/kernels/softmax.metal
436
- - mlx/mlx/backend/metal/kernels/sort.h
437
- - mlx/mlx/backend/metal/kernels/sort.metal
438
- - mlx/mlx/backend/metal/kernels/steel/attn/attn.h
439
- - mlx/mlx/backend/metal/kernels/steel/attn/kernels/steel_attention.h
440
- - mlx/mlx/backend/metal/kernels/steel/attn/kernels/steel_attention.metal
441
- - mlx/mlx/backend/metal/kernels/steel/attn/kernels/steel_attention_nax.h
442
- - mlx/mlx/backend/metal/kernels/steel/attn/kernels/steel_attention_nax.metal
443
- - mlx/mlx/backend/metal/kernels/steel/attn/loader.h
444
- - mlx/mlx/backend/metal/kernels/steel/attn/mma.h
445
- - mlx/mlx/backend/metal/kernels/steel/attn/nax.h
446
- - mlx/mlx/backend/metal/kernels/steel/attn/params.h
447
- - mlx/mlx/backend/metal/kernels/steel/attn/transforms.h
448
- - mlx/mlx/backend/metal/kernels/steel/conv/conv.h
449
- - mlx/mlx/backend/metal/kernels/steel/conv/kernels/steel_conv.h
450
- - mlx/mlx/backend/metal/kernels/steel/conv/kernels/steel_conv.metal
451
- - mlx/mlx/backend/metal/kernels/steel/conv/kernels/steel_conv_general.h
452
- - mlx/mlx/backend/metal/kernels/steel/conv/kernels/steel_conv_general.metal
453
- - mlx/mlx/backend/metal/kernels/steel/conv/loader.h
454
- - mlx/mlx/backend/metal/kernels/steel/conv/loaders/loader_channel_l.h
455
- - mlx/mlx/backend/metal/kernels/steel/conv/loaders/loader_channel_n.h
456
- - mlx/mlx/backend/metal/kernels/steel/conv/loaders/loader_general.h
457
- - mlx/mlx/backend/metal/kernels/steel/conv/params.h
458
- - mlx/mlx/backend/metal/kernels/steel/defines.h
459
- - mlx/mlx/backend/metal/kernels/steel/gemm/gemm.h
460
- - mlx/mlx/backend/metal/kernels/steel/gemm/gemm_nax.h
461
- - mlx/mlx/backend/metal/kernels/steel/gemm/kernels/steel_gemm_fused.h
462
- - mlx/mlx/backend/metal/kernels/steel/gemm/kernels/steel_gemm_fused.metal
463
- - mlx/mlx/backend/metal/kernels/steel/gemm/kernels/steel_gemm_fused_nax.h
464
- - mlx/mlx/backend/metal/kernels/steel/gemm/kernels/steel_gemm_fused_nax.metal
465
- - mlx/mlx/backend/metal/kernels/steel/gemm/kernels/steel_gemm_gather.h
466
- - mlx/mlx/backend/metal/kernels/steel/gemm/kernels/steel_gemm_gather.metal
467
- - mlx/mlx/backend/metal/kernels/steel/gemm/kernels/steel_gemm_gather_nax.h
468
- - mlx/mlx/backend/metal/kernels/steel/gemm/kernels/steel_gemm_gather_nax.metal
469
- - mlx/mlx/backend/metal/kernels/steel/gemm/kernels/steel_gemm_masked.h
470
- - mlx/mlx/backend/metal/kernels/steel/gemm/kernels/steel_gemm_masked.metal
471
- - mlx/mlx/backend/metal/kernels/steel/gemm/kernels/steel_gemm_segmented.h
472
- - mlx/mlx/backend/metal/kernels/steel/gemm/kernels/steel_gemm_segmented.metal
473
- - mlx/mlx/backend/metal/kernels/steel/gemm/kernels/steel_gemm_splitk.h
474
- - mlx/mlx/backend/metal/kernels/steel/gemm/kernels/steel_gemm_splitk.metal
475
- - mlx/mlx/backend/metal/kernels/steel/gemm/kernels/steel_gemm_splitk_nax.h
476
- - mlx/mlx/backend/metal/kernels/steel/gemm/kernels/steel_gemm_splitk_nax.metal
477
- - mlx/mlx/backend/metal/kernels/steel/gemm/loader.h
478
- - mlx/mlx/backend/metal/kernels/steel/gemm/mma.h
479
- - mlx/mlx/backend/metal/kernels/steel/gemm/nax.h
480
- - mlx/mlx/backend/metal/kernels/steel/gemm/params.h
481
- - mlx/mlx/backend/metal/kernels/steel/gemm/transforms.h
482
- - mlx/mlx/backend/metal/kernels/steel/utils.h
483
- - mlx/mlx/backend/metal/kernels/steel/utils/integral_constant.h
484
- - mlx/mlx/backend/metal/kernels/steel/utils/type_traits.h
485
- - mlx/mlx/backend/metal/kernels/ternary.h
486
- - mlx/mlx/backend/metal/kernels/ternary.metal
487
- - mlx/mlx/backend/metal/kernels/ternary_ops.h
488
- - mlx/mlx/backend/metal/kernels/unary.h
489
- - mlx/mlx/backend/metal/kernels/unary.metal
490
- - mlx/mlx/backend/metal/kernels/unary_ops.h
491
- - mlx/mlx/backend/metal/kernels/utils.h
492
- - mlx/mlx/backend/metal/logsumexp.cpp
493
- - mlx/mlx/backend/metal/make_compiled_preamble.sh
494
- - mlx/mlx/backend/metal/matmul.cpp
495
- - mlx/mlx/backend/metal/matmul.h
496
- - mlx/mlx/backend/metal/metal.cpp
497
- - mlx/mlx/backend/metal/metal.h
498
- - mlx/mlx/backend/metal/no_metal.cpp
499
- - mlx/mlx/backend/metal/nojit_kernels.cpp
500
- - mlx/mlx/backend/metal/normalization.cpp
501
- - mlx/mlx/backend/metal/primitives.cpp
502
- - mlx/mlx/backend/metal/quantized.cpp
503
- - mlx/mlx/backend/metal/reduce.cpp
504
- - mlx/mlx/backend/metal/reduce.h
505
- - mlx/mlx/backend/metal/resident.cpp
506
- - mlx/mlx/backend/metal/resident.h
507
- - mlx/mlx/backend/metal/rope.cpp
508
- - mlx/mlx/backend/metal/scaled_dot_product_attention.cpp
509
- - mlx/mlx/backend/metal/scan.cpp
510
- - mlx/mlx/backend/metal/scan.h
511
- - mlx/mlx/backend/metal/slicing.cpp
512
- - mlx/mlx/backend/metal/softmax.cpp
513
- - mlx/mlx/backend/metal/sort.cpp
514
- - mlx/mlx/backend/metal/ternary.cpp
515
- - mlx/mlx/backend/metal/ternary.h
516
- - mlx/mlx/backend/metal/unary.cpp
517
- - mlx/mlx/backend/metal/unary.h
518
- - mlx/mlx/backend/metal/utils.cpp
519
- - mlx/mlx/backend/metal/utils.h
520
- - mlx/mlx/backend/no_cpu/CMakeLists.txt
521
- - mlx/mlx/backend/no_cpu/compiled.cpp
522
- - mlx/mlx/backend/no_cpu/device_info.cpp
523
- - mlx/mlx/backend/no_cpu/primitives.cpp
524
- - mlx/mlx/backend/no_gpu/CMakeLists.txt
525
- - mlx/mlx/backend/no_gpu/allocator.cpp
526
- - mlx/mlx/backend/no_gpu/apple_memory.h
527
- - mlx/mlx/backend/no_gpu/device_info.cpp
528
- - mlx/mlx/backend/no_gpu/eval.cpp
529
- - mlx/mlx/backend/no_gpu/event.cpp
530
- - mlx/mlx/backend/no_gpu/fence.cpp
531
- - mlx/mlx/backend/no_gpu/linux_memory.h
532
- - mlx/mlx/backend/no_gpu/primitives.cpp
533
- - mlx/mlx/compile.cpp
534
- - mlx/mlx/compile.h
535
- - mlx/mlx/compile_impl.h
536
- - mlx/mlx/device.cpp
537
- - mlx/mlx/device.h
538
- - mlx/mlx/distributed/CMakeLists.txt
539
- - mlx/mlx/distributed/distributed.cpp
540
- - mlx/mlx/distributed/distributed.h
541
- - mlx/mlx/distributed/distributed_impl.h
542
- - mlx/mlx/distributed/jaccl/CMakeLists.txt
543
- - mlx/mlx/distributed/jaccl/jaccl.cpp
544
- - mlx/mlx/distributed/jaccl/jaccl.h
545
- - mlx/mlx/distributed/jaccl/mesh.cpp
546
- - mlx/mlx/distributed/jaccl/mesh.h
547
- - mlx/mlx/distributed/jaccl/no_jaccl.cpp
548
- - mlx/mlx/distributed/jaccl/ring.cpp
549
- - mlx/mlx/distributed/jaccl/ring.h
550
- - mlx/mlx/distributed/jaccl/utils.cpp
551
- - mlx/mlx/distributed/jaccl/utils.h
552
- - mlx/mlx/distributed/mpi/CMakeLists.txt
553
- - mlx/mlx/distributed/mpi/mpi.cpp
554
- - mlx/mlx/distributed/mpi/mpi.h
555
- - mlx/mlx/distributed/mpi/mpi_declarations.h
556
- - mlx/mlx/distributed/mpi/no_mpi.cpp
557
- - mlx/mlx/distributed/nccl/CMakeLists.txt
558
- - mlx/mlx/distributed/nccl/nccl.cpp
559
- - mlx/mlx/distributed/nccl/nccl.h
560
- - mlx/mlx/distributed/nccl/nccl_stub/CMakeLists.txt
561
- - mlx/mlx/distributed/nccl/nccl_stub/nccl_stubs.cpp
562
- - mlx/mlx/distributed/nccl/no_nccl.cpp
563
- - mlx/mlx/distributed/ops.cpp
564
- - mlx/mlx/distributed/ops.h
565
- - mlx/mlx/distributed/primitives.cpp
566
- - mlx/mlx/distributed/primitives.h
567
- - mlx/mlx/distributed/reduction_ops.h
568
- - mlx/mlx/distributed/ring/CMakeLists.txt
569
- - mlx/mlx/distributed/ring/no_ring.cpp
570
- - mlx/mlx/distributed/ring/ring.cpp
571
- - mlx/mlx/distributed/ring/ring.h
572
- - mlx/mlx/distributed/utils.cpp
573
- - mlx/mlx/distributed/utils.h
574
- - mlx/mlx/dtype.cpp
575
- - mlx/mlx/dtype.h
576
- - mlx/mlx/dtype_utils.cpp
577
- - mlx/mlx/dtype_utils.h
578
- - mlx/mlx/einsum.cpp
579
- - mlx/mlx/einsum.h
580
- - mlx/mlx/event.h
581
- - mlx/mlx/export.cpp
582
- - mlx/mlx/export.h
583
- - mlx/mlx/export_impl.h
584
- - mlx/mlx/fast.cpp
585
- - mlx/mlx/fast.h
586
- - mlx/mlx/fast_primitives.h
587
- - mlx/mlx/fence.h
588
- - mlx/mlx/fft.cpp
589
- - mlx/mlx/fft.h
590
- - mlx/mlx/graph_utils.cpp
591
- - mlx/mlx/graph_utils.h
592
- - mlx/mlx/io.h
593
- - mlx/mlx/io/CMakeLists.txt
594
- - mlx/mlx/io/gguf.cpp
595
- - mlx/mlx/io/gguf.h
596
- - mlx/mlx/io/gguf_quants.cpp
597
- - mlx/mlx/io/load.cpp
598
- - mlx/mlx/io/load.h
599
- - mlx/mlx/io/no_gguf.cpp
600
- - mlx/mlx/io/no_safetensors.cpp
601
- - mlx/mlx/io/safetensors.cpp
602
- - mlx/mlx/linalg.cpp
603
- - mlx/mlx/linalg.h
604
- - mlx/mlx/memory.h
605
- - mlx/mlx/mlx.h
606
- - mlx/mlx/ops.cpp
607
- - mlx/mlx/ops.h
608
- - mlx/mlx/primitives.cpp
609
- - mlx/mlx/primitives.h
610
- - mlx/mlx/random.cpp
611
- - mlx/mlx/random.h
612
- - mlx/mlx/scheduler.cpp
613
- - mlx/mlx/scheduler.h
614
- - mlx/mlx/small_vector.h
615
- - mlx/mlx/stream.h
616
- - mlx/mlx/threadpool.h
617
- - mlx/mlx/transforms.cpp
618
- - mlx/mlx/transforms.h
619
- - mlx/mlx/transforms_impl.h
620
- - mlx/mlx/types/bf16.h
621
- - mlx/mlx/types/complex.h
622
- - mlx/mlx/types/fp16.h
623
- - mlx/mlx/types/half_types.h
624
- - mlx/mlx/types/limits.h
625
- - mlx/mlx/utils.cpp
626
- - mlx/mlx/utils.h
627
- - mlx/mlx/version.cpp
628
- - mlx/mlx/version.h
151
+ - submodules/mlx-onnx/CMakeLists.txt
152
+ - submodules/mlx-onnx/LICENSE
153
+ - submodules/mlx-onnx/include/mlx/ir.hpp
154
+ - submodules/mlx-onnx/src/api.cpp
155
+ - submodules/mlx-onnx/src/compat.cpp
156
+ - submodules/mlx-onnx/src/detail.hpp
157
+ - submodules/mlx-onnx/src/export.cpp
158
+ - submodules/mlx-onnx/src/io.cpp
159
+ - submodules/mlx-onnx/src/json.hpp
160
+ - submodules/mlx-onnx/src/lowering.cpp
161
+ - submodules/mlx-onnx/src/mappings.cpp
162
+ - submodules/mlx-onnx/src/mappings.hpp
163
+ - submodules/mlx-onnx/src/onnx.cpp
164
+ - submodules/mlx-onnx/src/shared.cpp
165
+ - submodules/mlx/CMakeLists.txt
166
+ - submodules/mlx/cmake/FindCUDNN.cmake
167
+ - submodules/mlx/cmake/FindNCCL.cmake
168
+ - submodules/mlx/cmake/Findnvpl.cmake
169
+ - submodules/mlx/cmake/extension.cmake
170
+ - submodules/mlx/mlx.pc.in
171
+ - submodules/mlx/mlx/3rdparty/.clang-format
172
+ - submodules/mlx/mlx/3rdparty/pocketfft.h
173
+ - submodules/mlx/mlx/CMakeLists.txt
174
+ - submodules/mlx/mlx/allocator.h
175
+ - submodules/mlx/mlx/api.h
176
+ - submodules/mlx/mlx/array.cpp
177
+ - submodules/mlx/mlx/array.h
178
+ - submodules/mlx/mlx/backend/common/CMakeLists.txt
179
+ - submodules/mlx/mlx/backend/common/binary.h
180
+ - submodules/mlx/mlx/backend/common/broadcasting.cpp
181
+ - submodules/mlx/mlx/backend/common/broadcasting.h
182
+ - submodules/mlx/mlx/backend/common/buffer_cache.h
183
+ - submodules/mlx/mlx/backend/common/common.cpp
184
+ - submodules/mlx/mlx/backend/common/compiled.cpp
185
+ - submodules/mlx/mlx/backend/common/compiled.h
186
+ - submodules/mlx/mlx/backend/common/copy.h
187
+ - submodules/mlx/mlx/backend/common/hadamard.h
188
+ - submodules/mlx/mlx/backend/common/load.cpp
189
+ - submodules/mlx/mlx/backend/common/matmul.h
190
+ - submodules/mlx/mlx/backend/common/reduce.cpp
191
+ - submodules/mlx/mlx/backend/common/reduce.h
192
+ - submodules/mlx/mlx/backend/common/slicing.cpp
193
+ - submodules/mlx/mlx/backend/common/slicing.h
194
+ - submodules/mlx/mlx/backend/common/ternary.h
195
+ - submodules/mlx/mlx/backend/common/unary.h
196
+ - submodules/mlx/mlx/backend/common/utils.cpp
197
+ - submodules/mlx/mlx/backend/common/utils.h
198
+ - submodules/mlx/mlx/backend/cpu/CMakeLists.txt
199
+ - submodules/mlx/mlx/backend/cpu/arange.h
200
+ - submodules/mlx/mlx/backend/cpu/arg_reduce.cpp
201
+ - submodules/mlx/mlx/backend/cpu/binary.cpp
202
+ - submodules/mlx/mlx/backend/cpu/binary.h
203
+ - submodules/mlx/mlx/backend/cpu/binary_ops.h
204
+ - submodules/mlx/mlx/backend/cpu/binary_two.h
205
+ - submodules/mlx/mlx/backend/cpu/cholesky.cpp
206
+ - submodules/mlx/mlx/backend/cpu/compiled.cpp
207
+ - submodules/mlx/mlx/backend/cpu/compiled_preamble.h
208
+ - submodules/mlx/mlx/backend/cpu/conv.cpp
209
+ - submodules/mlx/mlx/backend/cpu/copy.cpp
210
+ - submodules/mlx/mlx/backend/cpu/copy.h
211
+ - submodules/mlx/mlx/backend/cpu/device_info.cpp
212
+ - submodules/mlx/mlx/backend/cpu/device_info.h
213
+ - submodules/mlx/mlx/backend/cpu/distributed.cpp
214
+ - submodules/mlx/mlx/backend/cpu/eig.cpp
215
+ - submodules/mlx/mlx/backend/cpu/eigh.cpp
216
+ - submodules/mlx/mlx/backend/cpu/encoder.cpp
217
+ - submodules/mlx/mlx/backend/cpu/encoder.h
218
+ - submodules/mlx/mlx/backend/cpu/eval.cpp
219
+ - submodules/mlx/mlx/backend/cpu/eval.h
220
+ - submodules/mlx/mlx/backend/cpu/fft.cpp
221
+ - submodules/mlx/mlx/backend/cpu/gemm.h
222
+ - submodules/mlx/mlx/backend/cpu/gemms/bnns.cpp
223
+ - submodules/mlx/mlx/backend/cpu/gemms/cblas.cpp
224
+ - submodules/mlx/mlx/backend/cpu/gemms/simd_bf16.cpp
225
+ - submodules/mlx/mlx/backend/cpu/gemms/simd_fp16.cpp
226
+ - submodules/mlx/mlx/backend/cpu/gemms/simd_gemm.h
227
+ - submodules/mlx/mlx/backend/cpu/hadamard.cpp
228
+ - submodules/mlx/mlx/backend/cpu/indexing.cpp
229
+ - submodules/mlx/mlx/backend/cpu/inverse.cpp
230
+ - submodules/mlx/mlx/backend/cpu/jit_compiler.cpp
231
+ - submodules/mlx/mlx/backend/cpu/jit_compiler.h
232
+ - submodules/mlx/mlx/backend/cpu/lapack.h
233
+ - submodules/mlx/mlx/backend/cpu/logsumexp.cpp
234
+ - submodules/mlx/mlx/backend/cpu/luf.cpp
235
+ - submodules/mlx/mlx/backend/cpu/make_compiled_preamble.ps1
236
+ - submodules/mlx/mlx/backend/cpu/make_compiled_preamble.sh
237
+ - submodules/mlx/mlx/backend/cpu/masked_mm.cpp
238
+ - submodules/mlx/mlx/backend/cpu/matmul.cpp
239
+ - submodules/mlx/mlx/backend/cpu/primitives.cpp
240
+ - submodules/mlx/mlx/backend/cpu/qrf.cpp
241
+ - submodules/mlx/mlx/backend/cpu/quantized.cpp
242
+ - submodules/mlx/mlx/backend/cpu/reduce.cpp
243
+ - submodules/mlx/mlx/backend/cpu/scan.cpp
244
+ - submodules/mlx/mlx/backend/cpu/select.cpp
245
+ - submodules/mlx/mlx/backend/cpu/simd/accelerate_fp16_simd.h
246
+ - submodules/mlx/mlx/backend/cpu/simd/accelerate_simd.h
247
+ - submodules/mlx/mlx/backend/cpu/simd/base_simd.h
248
+ - submodules/mlx/mlx/backend/cpu/simd/math.h
249
+ - submodules/mlx/mlx/backend/cpu/simd/neon_fp16_simd.h
250
+ - submodules/mlx/mlx/backend/cpu/simd/simd.h
251
+ - submodules/mlx/mlx/backend/cpu/simd/type.h
252
+ - submodules/mlx/mlx/backend/cpu/slicing.h
253
+ - submodules/mlx/mlx/backend/cpu/softmax.cpp
254
+ - submodules/mlx/mlx/backend/cpu/sort.cpp
255
+ - submodules/mlx/mlx/backend/cpu/svd.cpp
256
+ - submodules/mlx/mlx/backend/cpu/ternary.h
257
+ - submodules/mlx/mlx/backend/cpu/threefry.cpp
258
+ - submodules/mlx/mlx/backend/cpu/threefry.h
259
+ - submodules/mlx/mlx/backend/cpu/unary.cpp
260
+ - submodules/mlx/mlx/backend/cpu/unary.h
261
+ - submodules/mlx/mlx/backend/cpu/unary_ops.h
262
+ - submodules/mlx/mlx/backend/cuda/CMakeLists.txt
263
+ - submodules/mlx/mlx/backend/cuda/allocator.cpp
264
+ - submodules/mlx/mlx/backend/cuda/allocator.h
265
+ - submodules/mlx/mlx/backend/cuda/arange.cu
266
+ - submodules/mlx/mlx/backend/cuda/arg_reduce.cu
267
+ - submodules/mlx/mlx/backend/cuda/bin2h.cmake
268
+ - submodules/mlx/mlx/backend/cuda/binary/CMakeLists.txt
269
+ - submodules/mlx/mlx/backend/cuda/binary/add.cu
270
+ - submodules/mlx/mlx/backend/cuda/binary/arctan2.cu
271
+ - submodules/mlx/mlx/backend/cuda/binary/binary.cuh
272
+ - submodules/mlx/mlx/backend/cuda/binary/bitwise_binary.cu
273
+ - submodules/mlx/mlx/backend/cuda/binary/divide.cu
274
+ - submodules/mlx/mlx/backend/cuda/binary/equal.cu
275
+ - submodules/mlx/mlx/backend/cuda/binary/greater.cu
276
+ - submodules/mlx/mlx/backend/cuda/binary/greater_equal.cu
277
+ - submodules/mlx/mlx/backend/cuda/binary/less.cu
278
+ - submodules/mlx/mlx/backend/cuda/binary/less_equal.cu
279
+ - submodules/mlx/mlx/backend/cuda/binary/log_add_exp.cu
280
+ - submodules/mlx/mlx/backend/cuda/binary/logical_and.cu
281
+ - submodules/mlx/mlx/backend/cuda/binary/logical_or.cu
282
+ - submodules/mlx/mlx/backend/cuda/binary/maximum.cu
283
+ - submodules/mlx/mlx/backend/cuda/binary/minimum.cu
284
+ - submodules/mlx/mlx/backend/cuda/binary/multiply.cu
285
+ - submodules/mlx/mlx/backend/cuda/binary/not_equal.cu
286
+ - submodules/mlx/mlx/backend/cuda/binary/power.cu
287
+ - submodules/mlx/mlx/backend/cuda/binary/remainder.cu
288
+ - submodules/mlx/mlx/backend/cuda/binary/subtract.cu
289
+ - submodules/mlx/mlx/backend/cuda/binary_two.cu
290
+ - submodules/mlx/mlx/backend/cuda/compiled.cpp
291
+ - submodules/mlx/mlx/backend/cuda/conv.cpp
292
+ - submodules/mlx/mlx/backend/cuda/conv/conv.h
293
+ - submodules/mlx/mlx/backend/cuda/conv/gemm_conv.cu
294
+ - submodules/mlx/mlx/backend/cuda/conv/gemm_grouped_conv.cu
295
+ - submodules/mlx/mlx/backend/cuda/copy.cu
296
+ - submodules/mlx/mlx/backend/cuda/copy/copy.cuh
297
+ - submodules/mlx/mlx/backend/cuda/copy/copy_contiguous.cu
298
+ - submodules/mlx/mlx/backend/cuda/copy/copy_general.cu
299
+ - submodules/mlx/mlx/backend/cuda/copy/copy_general_dynamic.cu
300
+ - submodules/mlx/mlx/backend/cuda/copy/copy_general_input.cu
301
+ - submodules/mlx/mlx/backend/cuda/cublas_utils.cpp
302
+ - submodules/mlx/mlx/backend/cuda/cublas_utils.h
303
+ - submodules/mlx/mlx/backend/cuda/cuda.h
304
+ - submodules/mlx/mlx/backend/cuda/cuda_utils.h
305
+ - submodules/mlx/mlx/backend/cuda/cudnn_utils.cpp
306
+ - submodules/mlx/mlx/backend/cuda/cudnn_utils.h
307
+ - submodules/mlx/mlx/backend/cuda/custom_kernel.cpp
308
+ - submodules/mlx/mlx/backend/cuda/cutlass_utils.cuh
309
+ - submodules/mlx/mlx/backend/cuda/delayload.cpp
310
+ - submodules/mlx/mlx/backend/cuda/device.cpp
311
+ - submodules/mlx/mlx/backend/cuda/device.h
312
+ - submodules/mlx/mlx/backend/cuda/device/atomic_ops.cuh
313
+ - submodules/mlx/mlx/backend/cuda/device/binary_ops.cuh
314
+ - submodules/mlx/mlx/backend/cuda/device/cast_op.cuh
315
+ - submodules/mlx/mlx/backend/cuda/device/complex.cuh
316
+ - submodules/mlx/mlx/backend/cuda/device/config.h
317
+ - submodules/mlx/mlx/backend/cuda/device/fp16_math.cuh
318
+ - submodules/mlx/mlx/backend/cuda/device/gather.cuh
319
+ - submodules/mlx/mlx/backend/cuda/device/gather_axis.cuh
320
+ - submodules/mlx/mlx/backend/cuda/device/indexing.cuh
321
+ - submodules/mlx/mlx/backend/cuda/device/scatter.cuh
322
+ - submodules/mlx/mlx/backend/cuda/device/scatter_axis.cuh
323
+ - submodules/mlx/mlx/backend/cuda/device/scatter_ops.cuh
324
+ - submodules/mlx/mlx/backend/cuda/device/ternary_ops.cuh
325
+ - submodules/mlx/mlx/backend/cuda/device/unary_ops.cuh
326
+ - submodules/mlx/mlx/backend/cuda/device/utils.cuh
327
+ - submodules/mlx/mlx/backend/cuda/device_info.cpp
328
+ - submodules/mlx/mlx/backend/cuda/distributed.cu
329
+ - submodules/mlx/mlx/backend/cuda/eval.cpp
330
+ - submodules/mlx/mlx/backend/cuda/event.cu
331
+ - submodules/mlx/mlx/backend/cuda/event.h
332
+ - submodules/mlx/mlx/backend/cuda/fence.cpp
333
+ - submodules/mlx/mlx/backend/cuda/gemms/cublas_gemm.cpp
334
+ - submodules/mlx/mlx/backend/cuda/gemms/cublas_gemm.h
335
+ - submodules/mlx/mlx/backend/cuda/gemms/cublas_gemm_batched_12_0.cpp
336
+ - submodules/mlx/mlx/backend/cuda/gemms/cublas_gemm_batched_12_9.cu
337
+ - submodules/mlx/mlx/backend/cuda/gemms/gemv.cu
338
+ - submodules/mlx/mlx/backend/cuda/gemms/gemv.h
339
+ - submodules/mlx/mlx/backend/cuda/gemms/grouped_gemm.h
340
+ - submodules/mlx/mlx/backend/cuda/gemms/grouped_gemm_unaligned.cu
341
+ - submodules/mlx/mlx/backend/cuda/indexing.cpp
342
+ - submodules/mlx/mlx/backend/cuda/jit_module.cpp
343
+ - submodules/mlx/mlx/backend/cuda/jit_module.h
344
+ - submodules/mlx/mlx/backend/cuda/kernel_utils.cu
345
+ - submodules/mlx/mlx/backend/cuda/kernel_utils.cuh
346
+ - submodules/mlx/mlx/backend/cuda/layer_norm.cu
347
+ - submodules/mlx/mlx/backend/cuda/load.cpp
348
+ - submodules/mlx/mlx/backend/cuda/logsumexp.cu
349
+ - submodules/mlx/mlx/backend/cuda/lru_cache.h
350
+ - submodules/mlx/mlx/backend/cuda/matmul.cpp
351
+ - submodules/mlx/mlx/backend/cuda/no_cuda.cpp
352
+ - submodules/mlx/mlx/backend/cuda/primitives.cpp
353
+ - submodules/mlx/mlx/backend/cuda/quantized/affine_quantize.cu
354
+ - submodules/mlx/mlx/backend/cuda/quantized/convert_fp8.cu
355
+ - submodules/mlx/mlx/backend/cuda/quantized/cublas_qqmm.cpp
356
+ - submodules/mlx/mlx/backend/cuda/quantized/cublas_qqmm.h
357
+ - submodules/mlx/mlx/backend/cuda/quantized/cuda_fp4.h
358
+ - submodules/mlx/mlx/backend/cuda/quantized/fp_quantize.cu
359
+ - submodules/mlx/mlx/backend/cuda/quantized/mxfp8_quantize.cuh
360
+ - submodules/mlx/mlx/backend/cuda/quantized/no_qqmm_impl.cpp
361
+ - submodules/mlx/mlx/backend/cuda/quantized/nvfp4_quantize.cuh
362
+ - submodules/mlx/mlx/backend/cuda/quantized/qmv.cu
363
+ - submodules/mlx/mlx/backend/cuda/quantized/qmv.h
364
+ - submodules/mlx/mlx/backend/cuda/quantized/qqmm.cpp
365
+ - submodules/mlx/mlx/backend/cuda/quantized/qqmm_impl.cpp
366
+ - submodules/mlx/mlx/backend/cuda/quantized/qqmm_impl.h
367
+ - submodules/mlx/mlx/backend/cuda/quantized/qqmm_utils.cu
368
+ - submodules/mlx/mlx/backend/cuda/quantized/qqmm_utils.h
369
+ - submodules/mlx/mlx/backend/cuda/quantized/quantized.cpp
370
+ - submodules/mlx/mlx/backend/cuda/quantized/quantized.h
371
+ - submodules/mlx/mlx/backend/cuda/quantized/quantized_utils.cuh
372
+ - submodules/mlx/mlx/backend/cuda/quantized/quantized_utils.h
373
+ - submodules/mlx/mlx/backend/cuda/random.cu
374
+ - submodules/mlx/mlx/backend/cuda/reduce.cu
375
+ - submodules/mlx/mlx/backend/cuda/reduce/all_reduce.cu
376
+ - submodules/mlx/mlx/backend/cuda/reduce/col_reduce.cu
377
+ - submodules/mlx/mlx/backend/cuda/reduce/init_reduce.cu
378
+ - submodules/mlx/mlx/backend/cuda/reduce/reduce.cuh
379
+ - submodules/mlx/mlx/backend/cuda/reduce/reduce_ops.cuh
380
+ - submodules/mlx/mlx/backend/cuda/reduce/reduce_utils.cuh
381
+ - submodules/mlx/mlx/backend/cuda/reduce/row_reduce.cu
382
+ - submodules/mlx/mlx/backend/cuda/rms_norm.cu
383
+ - submodules/mlx/mlx/backend/cuda/rope.cu
384
+ - submodules/mlx/mlx/backend/cuda/scaled_dot_product_attention.cpp
385
+ - submodules/mlx/mlx/backend/cuda/scaled_dot_product_attention.cu
386
+ - submodules/mlx/mlx/backend/cuda/scan.cu
387
+ - submodules/mlx/mlx/backend/cuda/slicing.cpp
388
+ - submodules/mlx/mlx/backend/cuda/softmax.cu
389
+ - submodules/mlx/mlx/backend/cuda/sort.cu
390
+ - submodules/mlx/mlx/backend/cuda/steel/defines.cuh
391
+ - submodules/mlx/mlx/backend/cuda/steel/gemm.cuh
392
+ - submodules/mlx/mlx/backend/cuda/steel/mma.cuh
393
+ - submodules/mlx/mlx/backend/cuda/steel/tiles.cuh
394
+ - submodules/mlx/mlx/backend/cuda/steel/utils.cuh
395
+ - submodules/mlx/mlx/backend/cuda/ternary.cu
396
+ - submodules/mlx/mlx/backend/cuda/unary/CMakeLists.txt
397
+ - submodules/mlx/mlx/backend/cuda/unary/abs.cu
398
+ - submodules/mlx/mlx/backend/cuda/unary/arccos.cu
399
+ - submodules/mlx/mlx/backend/cuda/unary/arccosh.cu
400
+ - submodules/mlx/mlx/backend/cuda/unary/arcsin.cu
401
+ - submodules/mlx/mlx/backend/cuda/unary/arcsinh.cu
402
+ - submodules/mlx/mlx/backend/cuda/unary/arctan.cu
403
+ - submodules/mlx/mlx/backend/cuda/unary/arctanh.cu
404
+ - submodules/mlx/mlx/backend/cuda/unary/bitwise_invert.cu
405
+ - submodules/mlx/mlx/backend/cuda/unary/ceil.cu
406
+ - submodules/mlx/mlx/backend/cuda/unary/conjugate.cu
407
+ - submodules/mlx/mlx/backend/cuda/unary/cos.cu
408
+ - submodules/mlx/mlx/backend/cuda/unary/cosh.cu
409
+ - submodules/mlx/mlx/backend/cuda/unary/erf.cu
410
+ - submodules/mlx/mlx/backend/cuda/unary/erf_inv.cu
411
+ - submodules/mlx/mlx/backend/cuda/unary/exp.cu
412
+ - submodules/mlx/mlx/backend/cuda/unary/expm1.cu
413
+ - submodules/mlx/mlx/backend/cuda/unary/floor.cu
414
+ - submodules/mlx/mlx/backend/cuda/unary/imag.cu
415
+ - submodules/mlx/mlx/backend/cuda/unary/log.cu
416
+ - submodules/mlx/mlx/backend/cuda/unary/log1p.cu
417
+ - submodules/mlx/mlx/backend/cuda/unary/logical_not.cu
418
+ - submodules/mlx/mlx/backend/cuda/unary/negative.cu
419
+ - submodules/mlx/mlx/backend/cuda/unary/real.cu
420
+ - submodules/mlx/mlx/backend/cuda/unary/round.cu
421
+ - submodules/mlx/mlx/backend/cuda/unary/sigmoid.cu
422
+ - submodules/mlx/mlx/backend/cuda/unary/sign.cu
423
+ - submodules/mlx/mlx/backend/cuda/unary/sin.cu
424
+ - submodules/mlx/mlx/backend/cuda/unary/sinh.cu
425
+ - submodules/mlx/mlx/backend/cuda/unary/sqrt.cu
426
+ - submodules/mlx/mlx/backend/cuda/unary/square.cu
427
+ - submodules/mlx/mlx/backend/cuda/unary/tan.cu
428
+ - submodules/mlx/mlx/backend/cuda/unary/tanh.cu
429
+ - submodules/mlx/mlx/backend/cuda/unary/unary.cuh
430
+ - submodules/mlx/mlx/backend/cuda/utils.cpp
431
+ - submodules/mlx/mlx/backend/cuda/utils.h
432
+ - submodules/mlx/mlx/backend/cuda/vector_types.cuh
433
+ - submodules/mlx/mlx/backend/cuda/worker.cpp
434
+ - submodules/mlx/mlx/backend/cuda/worker.h
435
+ - submodules/mlx/mlx/backend/gpu/CMakeLists.txt
436
+ - submodules/mlx/mlx/backend/gpu/copy.cpp
437
+ - submodules/mlx/mlx/backend/gpu/copy.h
438
+ - submodules/mlx/mlx/backend/gpu/device_info.h
439
+ - submodules/mlx/mlx/backend/gpu/eval.h
440
+ - submodules/mlx/mlx/backend/gpu/primitives.cpp
441
+ - submodules/mlx/mlx/backend/gpu/slicing.cpp
442
+ - submodules/mlx/mlx/backend/gpu/slicing.h
443
+ - submodules/mlx/mlx/backend/metal/CMakeLists.txt
444
+ - submodules/mlx/mlx/backend/metal/allocator.cpp
445
+ - submodules/mlx/mlx/backend/metal/allocator.h
446
+ - submodules/mlx/mlx/backend/metal/binary.cpp
447
+ - submodules/mlx/mlx/backend/metal/binary.h
448
+ - submodules/mlx/mlx/backend/metal/compiled.cpp
449
+ - submodules/mlx/mlx/backend/metal/conv.cpp
450
+ - submodules/mlx/mlx/backend/metal/copy.cpp
451
+ - submodules/mlx/mlx/backend/metal/custom_kernel.cpp
452
+ - submodules/mlx/mlx/backend/metal/device.cpp
453
+ - submodules/mlx/mlx/backend/metal/device.h
454
+ - submodules/mlx/mlx/backend/metal/device_info.cpp
455
+ - submodules/mlx/mlx/backend/metal/distributed.cpp
456
+ - submodules/mlx/mlx/backend/metal/eval.cpp
457
+ - submodules/mlx/mlx/backend/metal/event.cpp
458
+ - submodules/mlx/mlx/backend/metal/fence.cpp
459
+ - submodules/mlx/mlx/backend/metal/fft.cpp
460
+ - submodules/mlx/mlx/backend/metal/hadamard.cpp
461
+ - submodules/mlx/mlx/backend/metal/indexing.cpp
462
+ - submodules/mlx/mlx/backend/metal/jit/includes.h
463
+ - submodules/mlx/mlx/backend/metal/jit/indexing.h
464
+ - submodules/mlx/mlx/backend/metal/jit_kernels.cpp
465
+ - submodules/mlx/mlx/backend/metal/kernels.h
466
+ - submodules/mlx/mlx/backend/metal/kernels/CMakeLists.txt
467
+ - submodules/mlx/mlx/backend/metal/kernels/arange.h
468
+ - submodules/mlx/mlx/backend/metal/kernels/arange.metal
469
+ - submodules/mlx/mlx/backend/metal/kernels/arg_reduce.metal
470
+ - submodules/mlx/mlx/backend/metal/kernels/atomic.h
471
+ - submodules/mlx/mlx/backend/metal/kernels/bf16.h
472
+ - submodules/mlx/mlx/backend/metal/kernels/bf16_math.h
473
+ - submodules/mlx/mlx/backend/metal/kernels/binary.h
474
+ - submodules/mlx/mlx/backend/metal/kernels/binary.metal
475
+ - submodules/mlx/mlx/backend/metal/kernels/binary_ops.h
476
+ - submodules/mlx/mlx/backend/metal/kernels/binary_two.h
477
+ - submodules/mlx/mlx/backend/metal/kernels/binary_two.metal
478
+ - submodules/mlx/mlx/backend/metal/kernels/cexpf.h
479
+ - submodules/mlx/mlx/backend/metal/kernels/complex.h
480
+ - submodules/mlx/mlx/backend/metal/kernels/conv.metal
481
+ - submodules/mlx/mlx/backend/metal/kernels/copy.h
482
+ - submodules/mlx/mlx/backend/metal/kernels/copy.metal
483
+ - submodules/mlx/mlx/backend/metal/kernels/defines.h
484
+ - submodules/mlx/mlx/backend/metal/kernels/erf.h
485
+ - submodules/mlx/mlx/backend/metal/kernels/expm1f.h
486
+ - submodules/mlx/mlx/backend/metal/kernels/fence.metal
487
+ - submodules/mlx/mlx/backend/metal/kernels/fft.h
488
+ - submodules/mlx/mlx/backend/metal/kernels/fft.metal
489
+ - submodules/mlx/mlx/backend/metal/kernels/fft/radix.h
490
+ - submodules/mlx/mlx/backend/metal/kernels/fft/readwrite.h
491
+ - submodules/mlx/mlx/backend/metal/kernels/fp4.h
492
+ - submodules/mlx/mlx/backend/metal/kernels/fp8.h
493
+ - submodules/mlx/mlx/backend/metal/kernels/fp_quantized.h
494
+ - submodules/mlx/mlx/backend/metal/kernels/fp_quantized.metal
495
+ - submodules/mlx/mlx/backend/metal/kernels/fp_quantized_nax.h
496
+ - submodules/mlx/mlx/backend/metal/kernels/fp_quantized_nax.metal
497
+ - submodules/mlx/mlx/backend/metal/kernels/gemv.metal
498
+ - submodules/mlx/mlx/backend/metal/kernels/gemv_masked.h
499
+ - submodules/mlx/mlx/backend/metal/kernels/gemv_masked.metal
500
+ - submodules/mlx/mlx/backend/metal/kernels/hadamard.h
501
+ - submodules/mlx/mlx/backend/metal/kernels/indexing/gather.h
502
+ - submodules/mlx/mlx/backend/metal/kernels/indexing/gather_axis.h
503
+ - submodules/mlx/mlx/backend/metal/kernels/indexing/gather_front.h
504
+ - submodules/mlx/mlx/backend/metal/kernels/indexing/indexing.h
505
+ - submodules/mlx/mlx/backend/metal/kernels/indexing/masked_scatter.h
506
+ - submodules/mlx/mlx/backend/metal/kernels/indexing/scatter.h
507
+ - submodules/mlx/mlx/backend/metal/kernels/indexing/scatter_axis.h
508
+ - submodules/mlx/mlx/backend/metal/kernels/layer_norm.metal
509
+ - submodules/mlx/mlx/backend/metal/kernels/logging.h
510
+ - submodules/mlx/mlx/backend/metal/kernels/logsumexp.h
511
+ - submodules/mlx/mlx/backend/metal/kernels/logsumexp.metal
512
+ - submodules/mlx/mlx/backend/metal/kernels/quantized.h
513
+ - submodules/mlx/mlx/backend/metal/kernels/quantized.metal
514
+ - submodules/mlx/mlx/backend/metal/kernels/quantized_nax.h
515
+ - submodules/mlx/mlx/backend/metal/kernels/quantized_nax.metal
516
+ - submodules/mlx/mlx/backend/metal/kernels/quantized_utils.h
517
+ - submodules/mlx/mlx/backend/metal/kernels/random.metal
518
+ - submodules/mlx/mlx/backend/metal/kernels/reduce.h
519
+ - submodules/mlx/mlx/backend/metal/kernels/reduce.metal
520
+ - submodules/mlx/mlx/backend/metal/kernels/reduce_utils.h
521
+ - submodules/mlx/mlx/backend/metal/kernels/reduction/ops.h
522
+ - submodules/mlx/mlx/backend/metal/kernels/reduction/reduce_all.h
523
+ - submodules/mlx/mlx/backend/metal/kernels/reduction/reduce_col.h
524
+ - submodules/mlx/mlx/backend/metal/kernels/reduction/reduce_init.h
525
+ - submodules/mlx/mlx/backend/metal/kernels/reduction/reduce_row.h
526
+ - submodules/mlx/mlx/backend/metal/kernels/rms_norm.metal
527
+ - submodules/mlx/mlx/backend/metal/kernels/rope.metal
528
+ - submodules/mlx/mlx/backend/metal/kernels/scaled_dot_product_attention.metal
529
+ - submodules/mlx/mlx/backend/metal/kernels/scan.h
530
+ - submodules/mlx/mlx/backend/metal/kernels/scan.metal
531
+ - submodules/mlx/mlx/backend/metal/kernels/sdpa_vector.h
532
+ - submodules/mlx/mlx/backend/metal/kernels/softmax.h
533
+ - submodules/mlx/mlx/backend/metal/kernels/softmax.metal
534
+ - submodules/mlx/mlx/backend/metal/kernels/sort.h
535
+ - submodules/mlx/mlx/backend/metal/kernels/sort.metal
536
+ - submodules/mlx/mlx/backend/metal/kernels/steel/attn/attn.h
537
+ - submodules/mlx/mlx/backend/metal/kernels/steel/attn/kernels/steel_attention.h
538
+ - submodules/mlx/mlx/backend/metal/kernels/steel/attn/kernels/steel_attention.metal
539
+ - submodules/mlx/mlx/backend/metal/kernels/steel/attn/kernels/steel_attention_nax.h
540
+ - submodules/mlx/mlx/backend/metal/kernels/steel/attn/kernels/steel_attention_nax.metal
541
+ - submodules/mlx/mlx/backend/metal/kernels/steel/attn/loader.h
542
+ - submodules/mlx/mlx/backend/metal/kernels/steel/attn/mma.h
543
+ - submodules/mlx/mlx/backend/metal/kernels/steel/attn/nax.h
544
+ - submodules/mlx/mlx/backend/metal/kernels/steel/attn/params.h
545
+ - submodules/mlx/mlx/backend/metal/kernels/steel/attn/transforms.h
546
+ - submodules/mlx/mlx/backend/metal/kernels/steel/conv/conv.h
547
+ - submodules/mlx/mlx/backend/metal/kernels/steel/conv/kernels/steel_conv.h
548
+ - submodules/mlx/mlx/backend/metal/kernels/steel/conv/kernels/steel_conv.metal
549
+ - submodules/mlx/mlx/backend/metal/kernels/steel/conv/kernels/steel_conv_general.h
550
+ - submodules/mlx/mlx/backend/metal/kernels/steel/conv/kernels/steel_conv_general.metal
551
+ - submodules/mlx/mlx/backend/metal/kernels/steel/conv/loader.h
552
+ - submodules/mlx/mlx/backend/metal/kernels/steel/conv/loaders/loader_channel_l.h
553
+ - submodules/mlx/mlx/backend/metal/kernels/steel/conv/loaders/loader_channel_n.h
554
+ - submodules/mlx/mlx/backend/metal/kernels/steel/conv/loaders/loader_general.h
555
+ - submodules/mlx/mlx/backend/metal/kernels/steel/conv/params.h
556
+ - submodules/mlx/mlx/backend/metal/kernels/steel/defines.h
557
+ - submodules/mlx/mlx/backend/metal/kernels/steel/gemm/gemm.h
558
+ - submodules/mlx/mlx/backend/metal/kernels/steel/gemm/gemm_nax.h
559
+ - submodules/mlx/mlx/backend/metal/kernels/steel/gemm/kernels/steel_gemm_fused.h
560
+ - submodules/mlx/mlx/backend/metal/kernels/steel/gemm/kernels/steel_gemm_fused.metal
561
+ - submodules/mlx/mlx/backend/metal/kernels/steel/gemm/kernels/steel_gemm_fused_nax.h
562
+ - submodules/mlx/mlx/backend/metal/kernels/steel/gemm/kernels/steel_gemm_fused_nax.metal
563
+ - submodules/mlx/mlx/backend/metal/kernels/steel/gemm/kernels/steel_gemm_gather.h
564
+ - submodules/mlx/mlx/backend/metal/kernels/steel/gemm/kernels/steel_gemm_gather.metal
565
+ - submodules/mlx/mlx/backend/metal/kernels/steel/gemm/kernels/steel_gemm_gather_nax.h
566
+ - submodules/mlx/mlx/backend/metal/kernels/steel/gemm/kernels/steel_gemm_gather_nax.metal
567
+ - submodules/mlx/mlx/backend/metal/kernels/steel/gemm/kernels/steel_gemm_masked.h
568
+ - submodules/mlx/mlx/backend/metal/kernels/steel/gemm/kernels/steel_gemm_masked.metal
569
+ - submodules/mlx/mlx/backend/metal/kernels/steel/gemm/kernels/steel_gemm_segmented.h
570
+ - submodules/mlx/mlx/backend/metal/kernels/steel/gemm/kernels/steel_gemm_segmented.metal
571
+ - submodules/mlx/mlx/backend/metal/kernels/steel/gemm/kernels/steel_gemm_splitk.h
572
+ - submodules/mlx/mlx/backend/metal/kernels/steel/gemm/kernels/steel_gemm_splitk.metal
573
+ - submodules/mlx/mlx/backend/metal/kernels/steel/gemm/kernels/steel_gemm_splitk_nax.h
574
+ - submodules/mlx/mlx/backend/metal/kernels/steel/gemm/kernels/steel_gemm_splitk_nax.metal
575
+ - submodules/mlx/mlx/backend/metal/kernels/steel/gemm/loader.h
576
+ - submodules/mlx/mlx/backend/metal/kernels/steel/gemm/mma.h
577
+ - submodules/mlx/mlx/backend/metal/kernels/steel/gemm/nax.h
578
+ - submodules/mlx/mlx/backend/metal/kernels/steel/gemm/params.h
579
+ - submodules/mlx/mlx/backend/metal/kernels/steel/gemm/transforms.h
580
+ - submodules/mlx/mlx/backend/metal/kernels/steel/utils.h
581
+ - submodules/mlx/mlx/backend/metal/kernels/steel/utils/integral_constant.h
582
+ - submodules/mlx/mlx/backend/metal/kernels/steel/utils/type_traits.h
583
+ - submodules/mlx/mlx/backend/metal/kernels/ternary.h
584
+ - submodules/mlx/mlx/backend/metal/kernels/ternary.metal
585
+ - submodules/mlx/mlx/backend/metal/kernels/ternary_ops.h
586
+ - submodules/mlx/mlx/backend/metal/kernels/unary.h
587
+ - submodules/mlx/mlx/backend/metal/kernels/unary.metal
588
+ - submodules/mlx/mlx/backend/metal/kernels/unary_ops.h
589
+ - submodules/mlx/mlx/backend/metal/kernels/utils.h
590
+ - submodules/mlx/mlx/backend/metal/logsumexp.cpp
591
+ - submodules/mlx/mlx/backend/metal/make_compiled_preamble.sh
592
+ - submodules/mlx/mlx/backend/metal/matmul.cpp
593
+ - submodules/mlx/mlx/backend/metal/matmul.h
594
+ - submodules/mlx/mlx/backend/metal/metal.cpp
595
+ - submodules/mlx/mlx/backend/metal/metal.h
596
+ - submodules/mlx/mlx/backend/metal/no_metal.cpp
597
+ - submodules/mlx/mlx/backend/metal/nojit_kernels.cpp
598
+ - submodules/mlx/mlx/backend/metal/normalization.cpp
599
+ - submodules/mlx/mlx/backend/metal/primitives.cpp
600
+ - submodules/mlx/mlx/backend/metal/quantized.cpp
601
+ - submodules/mlx/mlx/backend/metal/reduce.cpp
602
+ - submodules/mlx/mlx/backend/metal/reduce.h
603
+ - submodules/mlx/mlx/backend/metal/resident.cpp
604
+ - submodules/mlx/mlx/backend/metal/resident.h
605
+ - submodules/mlx/mlx/backend/metal/rope.cpp
606
+ - submodules/mlx/mlx/backend/metal/scaled_dot_product_attention.cpp
607
+ - submodules/mlx/mlx/backend/metal/scan.cpp
608
+ - submodules/mlx/mlx/backend/metal/scan.h
609
+ - submodules/mlx/mlx/backend/metal/slicing.cpp
610
+ - submodules/mlx/mlx/backend/metal/softmax.cpp
611
+ - submodules/mlx/mlx/backend/metal/sort.cpp
612
+ - submodules/mlx/mlx/backend/metal/ternary.cpp
613
+ - submodules/mlx/mlx/backend/metal/ternary.h
614
+ - submodules/mlx/mlx/backend/metal/unary.cpp
615
+ - submodules/mlx/mlx/backend/metal/unary.h
616
+ - submodules/mlx/mlx/backend/metal/utils.cpp
617
+ - submodules/mlx/mlx/backend/metal/utils.h
618
+ - submodules/mlx/mlx/backend/no_cpu/CMakeLists.txt
619
+ - submodules/mlx/mlx/backend/no_cpu/compiled.cpp
620
+ - submodules/mlx/mlx/backend/no_cpu/device_info.cpp
621
+ - submodules/mlx/mlx/backend/no_cpu/primitives.cpp
622
+ - submodules/mlx/mlx/backend/no_gpu/CMakeLists.txt
623
+ - submodules/mlx/mlx/backend/no_gpu/allocator.cpp
624
+ - submodules/mlx/mlx/backend/no_gpu/apple_memory.h
625
+ - submodules/mlx/mlx/backend/no_gpu/device_info.cpp
626
+ - submodules/mlx/mlx/backend/no_gpu/eval.cpp
627
+ - submodules/mlx/mlx/backend/no_gpu/event.cpp
628
+ - submodules/mlx/mlx/backend/no_gpu/fence.cpp
629
+ - submodules/mlx/mlx/backend/no_gpu/linux_memory.h
630
+ - submodules/mlx/mlx/backend/no_gpu/primitives.cpp
631
+ - submodules/mlx/mlx/compile.cpp
632
+ - submodules/mlx/mlx/compile.h
633
+ - submodules/mlx/mlx/compile_impl.h
634
+ - submodules/mlx/mlx/device.cpp
635
+ - submodules/mlx/mlx/device.h
636
+ - submodules/mlx/mlx/distributed/CMakeLists.txt
637
+ - submodules/mlx/mlx/distributed/distributed.cpp
638
+ - submodules/mlx/mlx/distributed/distributed.h
639
+ - submodules/mlx/mlx/distributed/distributed_impl.h
640
+ - submodules/mlx/mlx/distributed/jaccl/CMakeLists.txt
641
+ - submodules/mlx/mlx/distributed/jaccl/jaccl.cpp
642
+ - submodules/mlx/mlx/distributed/jaccl/jaccl.h
643
+ - submodules/mlx/mlx/distributed/jaccl/mesh.cpp
644
+ - submodules/mlx/mlx/distributed/jaccl/mesh.h
645
+ - submodules/mlx/mlx/distributed/jaccl/no_jaccl.cpp
646
+ - submodules/mlx/mlx/distributed/jaccl/ring.cpp
647
+ - submodules/mlx/mlx/distributed/jaccl/ring.h
648
+ - submodules/mlx/mlx/distributed/jaccl/utils.cpp
649
+ - submodules/mlx/mlx/distributed/jaccl/utils.h
650
+ - submodules/mlx/mlx/distributed/mpi/CMakeLists.txt
651
+ - submodules/mlx/mlx/distributed/mpi/mpi.cpp
652
+ - submodules/mlx/mlx/distributed/mpi/mpi.h
653
+ - submodules/mlx/mlx/distributed/mpi/mpi_declarations.h
654
+ - submodules/mlx/mlx/distributed/mpi/no_mpi.cpp
655
+ - submodules/mlx/mlx/distributed/nccl/CMakeLists.txt
656
+ - submodules/mlx/mlx/distributed/nccl/nccl.cpp
657
+ - submodules/mlx/mlx/distributed/nccl/nccl.h
658
+ - submodules/mlx/mlx/distributed/nccl/nccl_stub/CMakeLists.txt
659
+ - submodules/mlx/mlx/distributed/nccl/nccl_stub/nccl_stubs.cpp
660
+ - submodules/mlx/mlx/distributed/nccl/no_nccl.cpp
661
+ - submodules/mlx/mlx/distributed/ops.cpp
662
+ - submodules/mlx/mlx/distributed/ops.h
663
+ - submodules/mlx/mlx/distributed/primitives.cpp
664
+ - submodules/mlx/mlx/distributed/primitives.h
665
+ - submodules/mlx/mlx/distributed/reduction_ops.h
666
+ - submodules/mlx/mlx/distributed/ring/CMakeLists.txt
667
+ - submodules/mlx/mlx/distributed/ring/no_ring.cpp
668
+ - submodules/mlx/mlx/distributed/ring/ring.cpp
669
+ - submodules/mlx/mlx/distributed/ring/ring.h
670
+ - submodules/mlx/mlx/distributed/utils.cpp
671
+ - submodules/mlx/mlx/distributed/utils.h
672
+ - submodules/mlx/mlx/dtype.cpp
673
+ - submodules/mlx/mlx/dtype.h
674
+ - submodules/mlx/mlx/dtype_utils.cpp
675
+ - submodules/mlx/mlx/dtype_utils.h
676
+ - submodules/mlx/mlx/einsum.cpp
677
+ - submodules/mlx/mlx/einsum.h
678
+ - submodules/mlx/mlx/event.h
679
+ - submodules/mlx/mlx/export.cpp
680
+ - submodules/mlx/mlx/export.h
681
+ - submodules/mlx/mlx/export_impl.h
682
+ - submodules/mlx/mlx/fast.cpp
683
+ - submodules/mlx/mlx/fast.h
684
+ - submodules/mlx/mlx/fast_primitives.h
685
+ - submodules/mlx/mlx/fence.h
686
+ - submodules/mlx/mlx/fft.cpp
687
+ - submodules/mlx/mlx/fft.h
688
+ - submodules/mlx/mlx/graph_utils.cpp
689
+ - submodules/mlx/mlx/graph_utils.h
690
+ - submodules/mlx/mlx/io.h
691
+ - submodules/mlx/mlx/io/CMakeLists.txt
692
+ - submodules/mlx/mlx/io/gguf.cpp
693
+ - submodules/mlx/mlx/io/gguf.h
694
+ - submodules/mlx/mlx/io/gguf_quants.cpp
695
+ - submodules/mlx/mlx/io/load.cpp
696
+ - submodules/mlx/mlx/io/load.h
697
+ - submodules/mlx/mlx/io/no_gguf.cpp
698
+ - submodules/mlx/mlx/io/no_safetensors.cpp
699
+ - submodules/mlx/mlx/io/safetensors.cpp
700
+ - submodules/mlx/mlx/linalg.cpp
701
+ - submodules/mlx/mlx/linalg.h
702
+ - submodules/mlx/mlx/memory.h
703
+ - submodules/mlx/mlx/mlx.h
704
+ - submodules/mlx/mlx/ops.cpp
705
+ - submodules/mlx/mlx/ops.h
706
+ - submodules/mlx/mlx/primitives.cpp
707
+ - submodules/mlx/mlx/primitives.h
708
+ - submodules/mlx/mlx/random.cpp
709
+ - submodules/mlx/mlx/random.h
710
+ - submodules/mlx/mlx/scheduler.cpp
711
+ - submodules/mlx/mlx/scheduler.h
712
+ - submodules/mlx/mlx/small_vector.h
713
+ - submodules/mlx/mlx/stream.h
714
+ - submodules/mlx/mlx/threadpool.h
715
+ - submodules/mlx/mlx/transforms.cpp
716
+ - submodules/mlx/mlx/transforms.h
717
+ - submodules/mlx/mlx/transforms_impl.h
718
+ - submodules/mlx/mlx/types/bf16.h
719
+ - submodules/mlx/mlx/types/complex.h
720
+ - submodules/mlx/mlx/types/fp16.h
721
+ - submodules/mlx/mlx/types/half_types.h
722
+ - submodules/mlx/mlx/types/limits.h
723
+ - submodules/mlx/mlx/utils.cpp
724
+ - submodules/mlx/mlx/utils.h
725
+ - submodules/mlx/mlx/version.cpp
726
+ - submodules/mlx/mlx/version.h
629
727
  homepage: https://github.com/skryl/mlx-ruby
630
728
  licenses:
631
729
  - MIT
@@ -640,14 +738,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
640
738
  requirements:
641
739
  - - ">="
642
740
  - !ruby/object:Gem::Version
643
- version: '3.1'
741
+ version: '3.3'
644
742
  required_rubygems_version: !ruby/object:Gem::Requirement
645
743
  requirements:
646
744
  - - ">="
647
745
  - !ruby/object:Gem::Version
648
746
  version: '0'
649
747
  requirements: []
650
- rubygems_version: 3.6.2
748
+ rubygems_version: 4.0.3
651
749
  specification_version: 4
652
750
  summary: Ruby bindings for the native MLX library
653
751
  test_files: []