RubyGems - mlx - Versions diffs - 0.30.7.3 → 0.30.7.6 - Mend

mlx 0.30.7.3 → 0.30.7.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (590) hide show

data/{mlx → submodules/mlx}/mlx/ops.cpp RENAMED Viewed

@@ -10,6 +10,7 @@
 #include <sstream>
 #include "mlx/backend/cuda/cuda.h"
+#include "mlx/backend/metal/metal.h"
 #include "mlx/fast_primitives.h"
 #include "mlx/ops.h"
 #include "mlx/primitives.h"
@@ -2311,6 +2312,40 @@ array argmax(
   return out;
 }
+array hanning(int M, StreamOrDevice s /* = {} */) {
+  if (M < 1) {
+    return array({});
+  }
+  if (M == 1) {
+    return ones({1}, float32, s);
+  }
+  auto n = arange(0, M, float32, s);
+  array factor(M_PI / (M - 1), float32);
+  return square(sin(multiply(factor, n, s), s), s);
+}
+array hamming(int M, StreamOrDevice s /* = {} */) {
+  if (M < 1) {
+    return array({});
+  }
+  if (M == 1) {
+    return ones({1}, float32, s);
+  }
+  auto n = arange(0, M, float32, s);
+  float factor_val = (2.0 * M_PI) / (M - 1);
+  auto factor = array(factor_val, float32);
+  auto arg = multiply(factor, n, s);
+  auto cos_vals = cos(arg, s);
+  auto left_coef = array(0.54f, float32);
+  auto right_coef = array(0.46f, float32);
+  return subtract(left_coef, multiply(right_coef, cos_vals, s), s);
+}
 /** Returns a sorted copy of the flattened array. */
 array sort(const array& a, StreamOrDevice s /* = {} */) {
   int size = a.size();
@@ -4209,6 +4244,34 @@ std::pair<Dtype, QuantizationMode> validate_mode_with_type(
   }
 }
+void validate_global_scale(
+    std::string_view tag,
+    QuantizationMode qmode,
+    const std::optional<array>& global_scale) {
+  if (global_scale.has_value()) {
+    if (qmode != QuantizationMode::Nvfp4) {
+      std::ostringstream msg;
+      msg << "[" << tag << "] Global scale is only supported for 'nvfp4' "
+          << "quantization mode.";
+      throw std::invalid_argument(msg.str());
+    } else {
+      if (global_scale->size() != 1) {
+        std::ostringstream msg;
+        msg << "[" << tag << "] Global scale must be a scalar but got shape "
+            << global_scale->shape() << ".";
+        throw std::invalid_argument(msg.str());
+      }
+      // TODO: not sure if type should be restricted to float32
+      if (global_scale->dtype() != float32) {
+        std::ostringstream msg;
+        msg << "[" << tag << "] Global scale must have dtype float32 but got "
+            << global_scale->dtype() << ".";
+        throw std::invalid_argument(msg.str());
+      }
+    }
+  }
+}
 array quantized_matmul(
     array x,
     array w,
@@ -4251,7 +4314,6 @@ array quantized_matmul(
   if (x.ndim() > 2 && w.ndim() > 2) {
     inputs = broadcast_arrays(inputs, {-2, -1}, s);
   }
   auto out_shape = inputs[0].shape();
   out_shape.back() = w_outer_dims;
   return array(
@@ -4267,7 +4329,10 @@ void validate_qqmm_inputs(
     array w,
     std::optional<array> scales_w,
     int group_size,
-    int bits) {
+    int bits,
+    std::optional<array> global_scale_x,
+    std::optional<array> global_scale_w,
+    QuantizationMode qmode) {
   // check 2D (for now)
   if (x.ndim() > 2 || w.ndim() > 2) {
     std::ostringstream msg;
@@ -4304,6 +4369,19 @@ void validate_qqmm_inputs(
         << "first argument dtype == " << x.dtype() << ".";
     throw std::invalid_argument(msg.str());
   }
+  // validate global scales
+  validate_global_scale("qqmm", qmode, global_scale_x);
+  validate_global_scale("qqmm", qmode, global_scale_w);
+  // For nvfp4 mode, both global scales must be provided together or neither
+  if (qmode == QuantizationMode::Nvfp4) {
+    bool has_x = global_scale_x.has_value();
+    bool has_w = global_scale_w.has_value();
+    if (has_x != has_w) {
+      throw std::invalid_argument(
+          "[qqmm] For nvfp4 mode, either both global_scale_x and "
+          "global_scale_w must be provided, or neither.");
+    }
+  }
 }
 std::pair<int, int> extract_qqmm_dims(
@@ -4343,6 +4421,8 @@ array qqmm(
     std::optional<int> group_size_ /* = std::nullopt */,
     std::optional<int> bits_ /* = std::nullopt */,
     const std::string& mode /* = "nvfp4" */,
+    const std::optional<array> global_scale_x /* = std::nullopt */,
+    const std::optional<array> global_scale_w /* = std::nullopt */,
     StreamOrDevice s /* = {} */) {
   auto stream = to_stream(s);
   auto qmode = string_to_quantization_mode(mode, "qqmm");
@@ -4369,7 +4449,8 @@ array qqmm(
   }
   // validate inputs
-  validate_qqmm_inputs(x, w, scales_w, group_size, bits);
+  validate_qqmm_inputs(
+      x, w, scales_w, group_size, bits, global_scale_x, global_scale_w, qmode);
   // validate and extract shapes
   auto [w_inner_dims, w_outer_dims] =
       extract_qqmm_dims(x, w, scales_w, group_size, bits);
@@ -4380,6 +4461,11 @@ array qqmm(
   if (scales_w.has_value()) {
     inputs.push_back(*scales_w);
   }
+  if (global_scale_x.has_value() && global_scale_w.has_value()) {
+    inputs.push_back(*global_scale_x);
+    inputs.push_back(*global_scale_w);
+  }
   auto out_shape = inputs[0].shape();
   out_shape.back() = w_outer_dims;
   auto out = array(
@@ -4515,6 +4601,7 @@ std::vector<array> fp_quantize(
     int group_size,
     int bits,
     QuantizationMode mode,
+    const std::optional<array>& global_scale /* = std::nullopt */,
     Stream s) {
   int expected_gs = mode == QuantizationMode::Nvfp4 ? 16 : 32;
   int expected_bits = mode == QuantizationMode::Mxfp8 ? 8 : 4;
@@ -4532,6 +4619,12 @@ std::vector<array> fp_quantize(
         << bits << ".";
     throw std::invalid_argument(msg.str());
   }
+  auto inputs = std::vector<array>{w};
+  if (global_scale.has_value()) {
+    inputs.push_back(global_scale.value());
+  }
   auto fallback = [bits = bits, group_size = group_size, s](
                       const std::vector<array>& inputs) -> std::vector<array> {
     auto& w = inputs[0];
@@ -4543,8 +4636,13 @@ std::vector<array> fp_quantize(
         divide(max(abs(wq, s), -1, true, s), array(maxval, w.dtype()), s);
     if (group_size == 16) {
       // convert to e4m3
+      auto scale_encode = inputs.size() > 1
+          ? divide(array(448.0f * 6.0f, float32), inputs[1], s)
+          : array(1.0f, float32);
+      scales = multiply(scales, scale_encode, s);
       scales = to_fp8(scales, s);
-      wq = divide(wq, from_fp8(scales, w.dtype(), s), s);
+      wq = multiply(
+          divide(wq, from_fp8(scales, w.dtype(), s), s), scale_encode, s);
     } else {
       // convert to e8m0
       auto z = array(0, scales.dtype());
@@ -4600,9 +4698,9 @@ std::vector<array> fp_quantize(
         {uint32, uint8},
         std::make_shared<fast::Quantize>(
             s, fallback, group_size, bits, mode, false),
-        {w});
+        inputs);
   }
-  return fallback({w});
+  return fallback(inputs);
 }
 std::vector<array> quantize(
@@ -4610,6 +4708,7 @@ std::vector<array> quantize(
     std::optional<int> group_size_ /* = std::nullopt */,
     std::optional<int> bits_ /* = std::nullopt */,
     const std::string& mode /* = "affine" */,
+    const std::optional<array>& global_scale /* = std::nullopt */,
     StreamOrDevice s /* = {} */) {
   auto qmode = string_to_quantization_mode(mode, "quantize");
   auto [group_size, bits] =
@@ -4636,11 +4735,17 @@ std::vector<array> quantize(
         << " matrix has shape " << w.shape();
     throw std::invalid_argument(msg.str());
   }
+  if (to_stream(s).device == Device::gpu && metal::is_available() &&
+      global_scale.has_value()) {
+    std::ostringstream msg;
+    msg << "[quantize] Global scale is not supported on the Metal backend.";
+    throw std::invalid_argument(msg.str());
+  }
+  validate_global_scale("quantize", qmode, global_scale);
   if (qmode == QuantizationMode::Affine) {
     return affine_quantize(w, group_size, bits, s);
   } else {
-    return fp_quantize(w, group_size, bits, qmode, to_stream(s));
+    return fp_quantize(w, group_size, bits, qmode, global_scale, to_stream(s));
   }
 }
@@ -4745,6 +4850,7 @@ array fp_dequantize(
     int bits,
     Dtype out_type,
     QuantizationMode mode,
+    const std::optional<array>& global_scale /* = std::nullopt */,
     Stream s) {
   int expected_gs = mode == QuantizationMode::Nvfp4 ? 16 : 32;
   int expected_bits = mode == QuantizationMode::Mxfp8 ? 8 : 4;
@@ -4789,6 +4895,11 @@ array fp_dequantize(
     throw std::invalid_argument(msg.str());
   }
+  auto inputs = std::vector<array>{w, scales};
+  if (global_scale.has_value()) {
+    inputs.push_back(global_scale.value());
+  }
   auto fallback =
       [wshape = std::move(wshape),
        sshape = std::move(sshape),
@@ -4831,13 +4942,17 @@ array fp_dequantize(
     out = reshape(out, {-1, group_size}, s);
     scales = reshape(scales, {-1, 1}, s);
     if (group_size == 16) {
-      scales = from_fp8(scales, out_type, s);
+      array inv_scale_enc = inputs.size() > 2
+          ? divide(inputs[2], array(448.0f * 6.0f, out_type), s)
+          : array(1.0f, out_type);
+      scales = multiply(from_fp8(scales, out_type, s), inv_scale_enc, s);
     } else {
       scales = subtract(astype(scales, out_type, s), array(127, out_type), s);
       scales = power(array(2.0f, out_type), scales, s);
     }
     return {reshape(multiply(out, scales, s), wshape, s)};
   };
   if (s.device == Device::gpu) {
     auto out_shape = w.shape();
     out_shape.back() = out_size;
@@ -4846,9 +4961,9 @@ array fp_dequantize(
         out_type,
         std::make_shared<fast::Quantize>(
             s, fallback, group_size, bits, mode, true),
-        {w, scales});
+        inputs);
   }
-  return fallback({w, scales})[0];
+  return fallback(inputs)[0];
 }
 array dequantize(
@@ -4858,6 +4973,7 @@ array dequantize(
     std::optional<int> group_size_ /* = std::nullopt */,
     std::optional<int> bits_ /* = std::nullopt */,
     const std::string& mode /* = "affine" */,
+    const std::optional<array>& global_scale /* = std::nullopt */,
     std::optional<Dtype> dtype /* = std::nullopt */,
     StreamOrDevice s /* = {} */) {
   auto [out_type, qmode] =
@@ -4884,6 +5000,14 @@ array dequantize(
         << "but it has only " << w.ndim() << ".";
     throw std::invalid_argument(msg.str());
   }
+  if (global_scale.has_value()) {
+    if (to_stream(s).device == Device::gpu && metal::is_available()) {
+      std::ostringstream msg;
+      msg << "[dequantize] Global scale is not supported on the Metal backend.";
+      throw std::invalid_argument(msg.str());
+    }
+  }
+  validate_global_scale("dequantize", qmode, global_scale);
   if (qmode == QuantizationMode::Affine) {
     return astype(
@@ -4892,7 +5016,14 @@ array dequantize(
         s);
   } else {
     return fp_dequantize(
-        w, scales, group_size, bits, out_type, qmode, to_stream(s));
+        w,
+        scales,
+        group_size,
+        bits,
+        out_type,
+        qmode,
+        global_scale,
+        to_stream(s));
   }
 }
@@ -6091,4 +6222,4 @@ array contiguous(
       {a});
 }
-} // namespace mlx::core
+} // namespace mlx::core

data/{mlx → submodules/mlx}/mlx/ops.h RENAMED Viewed

@@ -666,6 +666,12 @@ min(const array& a,
 MLX_API array
 min(const array& a, int axis, bool keepdims = false, StreamOrDevice s = {});
+/** Returns the Hanning window of size M. */
+MLX_API array hanning(int M, StreamOrDevice s = {});
+/** Returns the Hamming window of size M. */
+MLX_API array hamming(int M, StreamOrDevice s = {});
 /** Returns the index of the minimum value in the array. */
 MLX_API array argmin(const array& a, bool keepdims, StreamOrDevice s = {});
 inline array argmin(const array& a, StreamOrDevice s = {}) {
@@ -1391,6 +1397,7 @@ MLX_API std::vector<array> quantize(
     std::optional<int> group_size = std::nullopt,
     std::optional<int> bits = std::nullopt,
     const std::string& mode = "affine",
+    const std::optional<array>& global_scale = std::nullopt,
     StreamOrDevice s = {});
 /** Dequantize a matrix produced by quantize() */
@@ -1401,17 +1408,20 @@ MLX_API array dequantize(
     std::optional<int> group_size = std::nullopt,
     std::optional<int> bits = std::nullopt,
     const std::string& mode = "affine",
+    const std::optional<array>& global_scale = std::nullopt,
     std::optional<Dtype> dtype = std::nullopt,
     StreamOrDevice s = {});
 MLX_API array qqmm(
     array x, // input activations
     array w, // maybe quantized weights
-    std::optional<array> w_scales = std::nullopt, // optional scales if w is
-                                                  // quantized
+    const std::optional<array> w_scales = std::nullopt, // optional scales if w
+                                                        // is quantized
     std::optional<int> group_size = std::nullopt,
     std::optional<int> bits = std::nullopt,
     const std::string& mode = "nvfp4",
+    const std::optional<array> global_scale_x = std::nullopt,
+    const std::optional<array> global_scale_w = std::nullopt,
     StreamOrDevice s = {});
 /** Convert an E4M3 float8 to the given floating point dtype. */

data/{mlx → submodules/mlx}/mlx/primitives.cpp RENAMED Viewed

@@ -3424,6 +3424,7 @@ std::vector<array> QuantizedMatmul::vjp(
             group_size_,
             bits_,
             quantization_mode_to_string(mode_),
+            {}, // placeholder for amax
             std::nullopt,
             stream());
         wq = unflatten(wq, -1, {-1, group_size_}, stream());
@@ -3484,14 +3485,14 @@ std::vector<Shape> QQMatmul::output_shapes(const std::vector<array>& inputs) {
 }
 std::vector<array> QQMatmul::vjp(
-    const std::vector<array>& primals, // non quantized x, non quantized w
+    const std::vector<array>& primals, // non quantized x, non quantized w, if
+                                       // nvfp4 global_scale_x, global_scale_w
     const std::vector<array>& cotangents, // non quantized upstream grads
     const std::vector<int>& argnums,
     const std::vector<array>&) {
-  if (primals.size() != 2) {
-    throw std::runtime_error(
-        "[QQMatmul::vjp] Expected exactly 2 non-quantized primal inputs (x, w).");
-  }
+  bool is_nvfp4 = mode_ == QuantizationMode::Nvfp4;
+  assert(primals.size() == 2 || (is_nvfp4 && primals.size() == 4));
   std::vector<array> vjps;
   auto& cotan = cotangents[0];
   auto& s = stream();
@@ -3499,6 +3500,15 @@ std::vector<array> QQMatmul::vjp(
   // primal[0] -- non quantized activations (M, K)
   // cotan -- non quantized grads (M, N)
   auto qmode = quantization_mode_to_string(mode_);
+  std::optional<array> cotan_amax = (primals.size() == 4)
+      ? std::make_optional(astype(max(abs(cotan, s), s), float32, s))
+      : std::nullopt;
+  auto get_primal_scale = [&](int idx) {
+    return (primals.size() == 4) ? std::make_optional(primals[idx])
+                                 : std::nullopt;
+  };
   for (auto arg : argnums) {
     if (arg == 0) { // gradient wrt to x
       // We transpose weights -> quantize along N
@@ -3509,6 +3519,8 @@ std::vector<array> QQMatmul::vjp(
           group_size_,
           bits_,
           qmode,
+          cotan_amax,
+          get_primal_scale(3), // global_scale_w (for w.T)
           s));
     } else if (arg == 1) { // gradient wrt to weights
       vjps.push_back(qqmm(
@@ -3518,7 +3530,11 @@ std::vector<array> QQMatmul::vjp(
           group_size_,
           bits_,
           qmode,
+          cotan_amax,
+          get_primal_scale(2), // global_scale_x (for x.T)
           s));
+    } else {
+      vjps.push_back(zeros_like(primals[arg], s));
     }
   }
   return vjps;
@@ -3643,6 +3659,7 @@ std::vector<array> GatherQMM::vjp(
                             bits_,
                             quantization_mode_to_string(mode_),
                             std::nullopt,
+                            std::nullopt, // amax placeholder
                             stream()),
                         -1,
                         {-1, group_size_},

data/{mlx → submodules/mlx}/mlx/scheduler.cpp RENAMED Viewed

@@ -26,6 +26,10 @@ Stream get_stream(int index) {
   return scheduler::scheduler().get_stream(index);
 }
+std::vector<Stream> get_streams() {
+  return scheduler::scheduler().get_streams();
+}
 Stream new_stream(Device d) {
   if (!gpu::is_available() && d == Device::gpu) {
     throw std::invalid_argument(

data/{mlx → submodules/mlx}/mlx/scheduler.h RENAMED Viewed

@@ -99,6 +99,9 @@ class Scheduler {
   Stream get_stream(int index) const {
     return streams_.at(index);
   }
+  std::vector<Stream> get_streams() const {
+    return streams_;
+  }
   void set_default_stream(const Stream& s) {
     default_streams_.at(s.device.type) = s;

data/{mlx → submodules/mlx}/mlx/stream.h RENAMED Viewed

@@ -2,6 +2,8 @@
 #pragma once
+#include <vector>
 #include "mlx/api.h"
 #include "mlx/device.h"
@@ -25,6 +27,9 @@ MLX_API Stream new_stream(Device d);
 /** Get the stream with the given index. */
 MLX_API Stream get_stream(int index);
+/** Get all available streams. */
+MLX_API std::vector<Stream> get_streams();
 inline bool operator==(const Stream& lhs, const Stream& rhs) {
   return lhs.index == rhs.index;
 }

data/submodules/mlx-onnx/CMakeLists.txt ADDED Viewed

@@ -0,0 +1,159 @@
+cmake_minimum_required(VERSION 3.25)
+project(mlx_onnx VERSION 0.30.7.1 LANGUAGES C CXX)
+set(CMAKE_CXX_STANDARD 20)
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+set(CMAKE_POSITION_INDEPENDENT_CODE ON)
+option(MLX_ONNX_USE_EXTERNAL_MLX "Build against an externally provided MLX install" OFF)
+option(MLX_ONNX_BUILD_PYTHON_BINDINGS "Build Python IR bindings" OFF)
+option(MLX_ONNX_INSTALL_CPP_ARTIFACTS "Install C++ library and headers" ON)
+include(FetchContent)
+if(MLX_ONNX_USE_EXTERNAL_MLX AND MLX_ONNX_BUILD_PYTHON_BINDINGS)
+  message(
+    FATAL_ERROR
+      "MLX_ONNX_BUILD_PYTHON_BINDINGS requires bundled mlx sources; set MLX_ONNX_USE_EXTERNAL_MLX=OFF")
+endif()
+if(MLX_ONNX_USE_EXTERNAL_MLX)
+  set(MLX_ONNX_EXTERNAL_MLX_INCLUDE_DIR "" CACHE PATH "Path to MLX include root")
+  set(MLX_ONNX_EXTERNAL_MLX_LIB_DIR "" CACHE PATH "Path to MLX library directory")
+  if(MLX_ONNX_EXTERNAL_MLX_INCLUDE_DIR STREQUAL "")
+    message(FATAL_ERROR "MLX_ONNX_EXTERNAL_MLX_INCLUDE_DIR must be set when MLX_ONNX_USE_EXTERNAL_MLX=ON")
+  endif()
+  if(MLX_ONNX_EXTERNAL_MLX_LIB_DIR STREQUAL "")
+    message(FATAL_ERROR "MLX_ONNX_EXTERNAL_MLX_LIB_DIR must be set when MLX_ONNX_USE_EXTERNAL_MLX=ON")
+  endif()
+  find_library(
+    MLX_EXTERNAL_LIBRARY
+    NAMES mlx
+    PATHS ${MLX_ONNX_EXTERNAL_MLX_LIB_DIR}
+    NO_DEFAULT_PATH)
+  if(NOT MLX_EXTERNAL_LIBRARY)
+    message(FATAL_ERROR "Could not find libmlx in ${MLX_ONNX_EXTERNAL_MLX_LIB_DIR}")
+  endif()
+  add_library(mlx SHARED IMPORTED GLOBAL)
+  set_target_properties(
+    mlx
+    PROPERTIES
+      IMPORTED_LOCATION ${MLX_EXTERNAL_LIBRARY}
+      INTERFACE_INCLUDE_DIRECTORIES ${MLX_ONNX_EXTERNAL_MLX_INCLUDE_DIR})
+else()
+  set(MLX_BUILD_TESTS OFF CACHE BOOL "" FORCE)
+  set(MLX_BUILD_EXAMPLES OFF CACHE BOOL "" FORCE)
+  set(MLX_BUILD_BENCHMARKS OFF CACHE BOOL "" FORCE)
+  if(MLX_ONNX_BUILD_PYTHON_BINDINGS)
+    set(MLX_BUILD_PYTHON_BINDINGS ON CACHE BOOL "" FORCE)
+  else()
+    set(MLX_BUILD_PYTHON_BINDINGS OFF CACHE BOOL "" FORCE)
+  endif()
+  set(MLX_BUILD_PYTHON_STUBS OFF CACHE BOOL "" FORCE)
+  set(MLX_BUILD_GGUF OFF CACHE BOOL "" FORCE)
+  set(MLX_BUILD_SAFETENSORS OFF CACHE BOOL "" FORCE)
+  add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/mlx)
+endif()
+if(NOT TARGET nlohmann_json::nlohmann_json)
+  FetchContent_Declare(
+    nlohmann_json
+    GIT_REPOSITORY https://github.com/nlohmann/json.git
+    GIT_TAG v3.11.3
+    EXCLUDE_FROM_ALL)
+  FetchContent_MakeAvailable(nlohmann_json)
+endif()
+add_library(
+  mlx_onnx
+  src/export.cpp
+  src/api.cpp
+  src/compat.cpp
+  src/io.cpp
+  src/lowering.cpp
+  src/mappings.cpp
+  src/onnx.cpp
+  src/shared.cpp)
+set_target_properties(mlx_onnx PROPERTIES OUTPUT_NAME mlx_onnx)
+target_include_directories(
+  mlx_onnx
+  PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
+         $<INSTALL_INTERFACE:include>
+  PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/src)
+if(MLX_ONNX_USE_EXTERNAL_MLX)
+  target_include_directories(mlx_onnx PRIVATE ${MLX_ONNX_EXTERNAL_MLX_INCLUDE_DIR})
+endif()
+target_link_libraries(mlx_onnx PUBLIC mlx nlohmann_json::nlohmann_json)
+if(MLX_ONNX_BUILD_PYTHON_BINDINGS)
+  add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/python/src)
+  set(MLX_ONNX_PY_INIT_FILE ${CMAKE_CURRENT_SOURCE_DIR}/python/mlx_onnx/__init__.py)
+  if(NOT EXISTS ${MLX_ONNX_PY_INIT_FILE})
+    set(MLX_ONNX_PY_INIT_FILE ${CMAKE_CURRENT_BINARY_DIR}/mlx_onnx___init__.py)
+    file(WRITE ${MLX_ONNX_PY_INIT_FILE} "from ._core import *  # noqa: F401,F403\n")
+  endif()
+  if(NOT EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/mlx/CMakeLists.txt)
+    message(FATAL_ERROR "Bundled mlx sources are missing at ${CMAKE_CURRENT_SOURCE_DIR}/mlx")
+  endif()
+  if(NOT TARGET core)
+    message(FATAL_ERROR "Bundled mlx Python extension target `core` was not built")
+  endif()
+  install(TARGETS core LIBRARY DESTINATION mlx COMPONENT python)
+  if(APPLE AND MLX_BUILD_METAL)
+    # MLX looks for mlx.metallib next to the extension module using MLX runtime.
+    install(
+      FILES ${CMAKE_CURRENT_BINARY_DIR}/mlx/mlx/backend/metal/kernels/mlx.metallib
+      DESTINATION mlx
+      COMPONENT python)
+    # mlx_onnx._core also links MLX and resolves the same metallib at runtime.
+    install(
+      FILES ${CMAKE_CURRENT_BINARY_DIR}/mlx/mlx/backend/metal/kernels/mlx.metallib
+      DESTINATION mlx_onnx
+      COMPONENT python)
+  endif()
+  install(
+    DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/mlx/python/mlx/
+    DESTINATION mlx
+    COMPONENT python
+    PATTERN "__pycache__" EXCLUDE)
+  install(
+    FILES ${MLX_ONNX_PY_INIT_FILE}
+    DESTINATION mlx_onnx
+    RENAME __init__.py
+    COMPONENT python)
+  set(MLX_ONNX_VENDOR_MLX_ROOT mlx_onnx/_vendor/mlx)
+  install(
+    FILES ${CMAKE_CURRENT_SOURCE_DIR}/mlx/CMakeLists.txt
+          ${CMAKE_CURRENT_SOURCE_DIR}/mlx/mlx.pc.in
+          ${CMAKE_CURRENT_SOURCE_DIR}/mlx/LICENSE
+          ${CMAKE_CURRENT_SOURCE_DIR}/mlx/ACKNOWLEDGMENTS.md
+    DESTINATION ${MLX_ONNX_VENDOR_MLX_ROOT}
+    COMPONENT python)
+  install(
+    DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/mlx/cmake
+              ${CMAKE_CURRENT_SOURCE_DIR}/mlx/mlx
+    DESTINATION ${MLX_ONNX_VENDOR_MLX_ROOT}
+    COMPONENT python)
+endif()
+if(MLX_ONNX_INSTALL_CPP_ARTIFACTS)
+  include(GNUInstallDirs)
+  install(
+    TARGETS mlx_onnx
+    LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
+    ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
+    RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
+    INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
+    COMPONENT cpp)
+  install(DIRECTORY include/ DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} COMPONENT cpp)
+endif()

data/submodules/mlx-onnx/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2026 MLX Contributors
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.