PyPI - nexaai - Versions diffs - 1.0.19rc6__cp310-cp310-macosx_14_0_universal2.whl → 1.0.19rc8__cp310-cp310-macosx_14_0_universal2.whl - Mend

nexaai 1.0.19rc6__cp310-cp310-macosx_14_0_universal2.whl → 1.0.19rc8__cp310-cp310-macosx_14_0_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of nexaai might be problematic. Click here for more details.

Files changed (35) hide show

nexaai/mlx_backend/vlm/modeling/models/qwen3vl_moe/switch_layers.py ADDED Viewed

@@ -0,0 +1,210 @@
+import math
+import mlx.core as mx
+import mlx.nn as nn
+def _gather_sort(x, indices):
+    *_, M = indices.shape
+    indices = indices.flatten()
+    order = mx.argsort(indices)
+    inv_order = mx.argsort(order)
+    return x.flatten(0, -3)[order // M], indices[order], inv_order
+def _scatter_unsort(x, inv_order, shape=None):
+    x = x[inv_order]
+    if shape is not None:
+        x = mx.unflatten(x, 0, shape)
+    return x
+class QuantizedSwitchLinear(nn.Module):
+    def __init__(
+        self,
+        input_dims: int,
+        output_dims: int,
+        num_experts: int,
+        bias: bool = True,
+        group_size: int = 64,
+        bits: int = 4,
+    ):
+        super().__init__()
+        scale = math.sqrt(1 / input_dims)
+        self.weight, self.scales, self.biases = mx.quantize(
+            mx.random.uniform(
+                low=-scale,
+                high=scale,
+                shape=(num_experts, output_dims, input_dims),
+            ),
+            group_size=group_size,
+            bits=bits,
+        )
+        if bias:
+            self.bias = mx.zeros((num_experts, output_dims))
+        self.group_size = group_size
+        self.bits = bits
+        # Freeze this model's parameters
+        self.freeze()
+    def unfreeze(self, *args, **kwargs):
+        """Wrap unfreeze so that we unfreeze any layers we might contain but
+        our parameters will remain frozen."""
+        super().unfreeze(*args, **kwargs)
+        self.freeze(recurse=False)
+    @property
+    def input_dims(self):
+        return self.scales.shape[2] * self.group_size
+    @property
+    def output_dims(self):
+        return self.weight.shape[1]
+    @property
+    def num_experts(self):
+        return self.weight.shape[0]
+    def __call__(self, x, indices, sorted_indices=False):
+        x = mx.gather_qmm(
+            x,
+            self["weight"],
+            self["scales"],
+            self["biases"],
+            rhs_indices=indices,
+            transpose=True,
+            group_size=self.group_size,
+            bits=self.bits,
+            sorted_indices=sorted_indices,
+        )
+        if "bias" in self:
+            x = x + mx.expand_dims(self["bias"][indices], -2)
+        return x
+class SwitchLinear(nn.Module):
+    def __init__(
+        self, input_dims: int, output_dims: int, num_experts: int, bias: bool = True
+    ):
+        super().__init__()
+        scale = math.sqrt(1 / input_dims)
+        self.weight = mx.random.uniform(
+            low=-scale,
+            high=scale,
+            shape=(num_experts, output_dims, input_dims),
+        )
+        if bias:
+            self.bias = mx.zeros((num_experts, output_dims))
+    @property
+    def input_dims(self):
+        return self.weight.shape[2]
+    @property
+    def output_dims(self):
+        return self.weight.shape[1]
+    @property
+    def num_experts(self):
+        return self.weight.shape[0]
+    def __call__(self, x, indices, sorted_indices=False):
+        x = mx.gather_mm(
+            x,
+            self["weight"].swapaxes(-1, -2),
+            lhs_indices=None,
+            rhs_indices=indices,
+        )
+        if "bias" in self:
+            x = x + mx.expand_dims(self["bias"][indices], -2)
+        return x
+    def to_quantized(self, group_size: int = 64, bits: int = 4):
+        num_experts, output_dims, input_dims = self.weight.shape
+        ql = QuantizedSwitchLinear(
+            input_dims, output_dims, num_experts, False, group_size, bits
+        )
+        ql.weight, ql.scales, ql.biases = mx.quantize(self.weight, group_size, bits)
+        if "bias" in self:
+            ql.bias = self.bias
+        return ql
+class SwitchGLU(nn.Module):
+    def __init__(
+        self,
+        input_dims: int,
+        hidden_dims: int,
+        num_experts: int,
+        activation=nn.SiLU(),
+        bias: bool = False,
+    ):
+        super().__init__()
+        self.gate_proj = SwitchLinear(input_dims, hidden_dims, num_experts, bias=bias)
+        self.up_proj = SwitchLinear(input_dims, hidden_dims, num_experts, bias=bias)
+        self.down_proj = SwitchLinear(hidden_dims, input_dims, num_experts, bias=bias)
+        self.activation = activation
+    def __call__(self, x, indices) -> mx.array:
+        x = mx.expand_dims(x, (-2, -3))
+        # When we have many tokens, then sort them to make sure that the access
+        # of different experts is in order.
+        do_sort = indices.size >= 64
+        idx = indices
+        inv_order = None
+        if do_sort:
+            x, idx, inv_order = _gather_sort(x, indices)
+        x_up = self.up_proj(x, idx, sorted_indices=do_sort)
+        x_gate = self.gate_proj(x, idx, sorted_indices=do_sort)
+        x = self.down_proj(
+            self.activation(x_gate) * x_up,
+            idx,
+            sorted_indices=do_sort,
+        )
+        if do_sort:
+            x = _scatter_unsort(x, inv_order, indices.shape)
+        return x.squeeze(-2)
+class SwitchMLP(nn.Module):
+    def __init__(
+        self,
+        input_dims: int,
+        hidden_dims: int,
+        num_experts: int,
+        activation=nn.GELU(approx="precise"),
+        bias: bool = False,
+    ):
+        super().__init__()
+        self.fc1 = SwitchLinear(input_dims, hidden_dims, num_experts, bias=bias)
+        self.fc2 = SwitchLinear(hidden_dims, input_dims, num_experts, bias=bias)
+        self.activation = activation
+    def __call__(self, x, indices) -> mx.array:
+        x = mx.expand_dims(x, (-2, -3))
+        # When we have many tokens, then sort them to make sure that the access
+        # of different experts is in order.
+        do_sort = indices.size >= 64
+        idx = indices
+        inv_order = None
+        if do_sort:
+            x, idx, inv_order = _gather_sort(x, indices)
+        x = self.fc1(x, idx, sorted_indices=do_sort)
+        x = self.activation(x)
+        x = self.fc2(x, idx, sorted_indices=do_sort)
+        if do_sort:
+            x = _scatter_unsort(x, inv_order, indices.shape)
+        return x.squeeze(-2)

nexaai/utils/manifest_utils.py CHANGED Viewed

@@ -22,6 +22,11 @@ from .model_types import (
     MODEL_TYPE_TO_PIPELINE
 )
+MODEL_FILE_TYPE_TO_PLUGIN_ID_MAPPING = {
+    'npu': 'npu',
+    'mlx': 'mlx',
+    'gguf': 'llama_cpp'
+}
 def process_manifest_metadata(manifest: Dict[str, Any], repo_id: str) -> Dict[str, Any]:
     """Process manifest metadata to handle null/missing fields."""
@@ -94,12 +99,20 @@ def save_download_metadata(directory_path: str, metadata: Dict[str, Any]) -> Non
         pass
-def create_gguf_manifest(repo_id: str, files: List[str], directory_path: str, old_metadata: Dict[str, Any], is_mmproj: bool = False, file_name: Optional[Union[str, List[str]]] = None) -> Dict[str, Any]:
+def _get_plugin_id_from_model_file_type(model_file_type: Optional[str], default: str = "llama_cpp") -> str:
+    """Map model file type to PluginId."""
+    return MODEL_FILE_TYPE_TO_PLUGIN_ID_MAPPING.get(model_file_type, default)
+def create_gguf_manifest(repo_id: str, files: List[str], directory_path: str, old_metadata: Dict[str, Any], is_mmproj: bool = False, file_name: Optional[Union[str, List[str]]] = None, **kwargs) -> Dict[str, Any]:
     """Create GGUF format manifest."""
     # Load existing manifest to merge GGUF files if it exists
     existing_manifest = load_nexa_manifest(directory_path)
+    # Check if there's a downloaded nexa.manifest from the repo
+    downloaded_manifest = old_metadata.get('downloaded_manifest', {})
     model_files = {}
     if existing_manifest and "ModelFile" in existing_manifest:
         model_files = existing_manifest["ModelFile"].copy()
@@ -151,10 +164,41 @@ def create_gguf_manifest(repo_id: str, files: List[str], directory_path: str, ol
                     "Size": file_size
                 }
+    # Determine PluginId with priority: kwargs > downloaded_manifest > model_file_type > default
+    plugin_id = kwargs.get('plugin_id')
+    if not plugin_id:
+        model_file_type = old_metadata.get('model_file_type')
+        if downloaded_manifest.get('PluginId'):
+            plugin_id = downloaded_manifest.get('PluginId')
+        elif model_file_type:
+            plugin_id = _get_plugin_id_from_model_file_type(model_file_type)
+        else:
+            plugin_id = "llama_cpp"
+    # Determine ModelType with priority: kwargs > downloaded_manifest > pipeline_tag mapping
+    model_type = kwargs.get('model_type')
+    if not model_type:
+        if downloaded_manifest.get('ModelType'):
+            model_type = downloaded_manifest.get('ModelType')
+        else:
+            model_type = PIPELINE_TO_MODEL_TYPE.get(old_metadata.get('pipeline_tag'), "other")
+    # Determine ModelName with priority: kwargs > downloaded_manifest > empty string
+    model_name = kwargs.get('model_name')
+    if not model_name:
+        model_name = downloaded_manifest.get('ModelName', '')
+    # Get DeviceId and MinSDKVersion from kwargs or default to empty string
+    device_id = kwargs.get('device_id', '')
+    min_sdk_version = kwargs.get('min_sdk_version', '')
     manifest = {
         "Name": repo_id,
-        "ModelType": PIPELINE_TO_MODEL_TYPE.get(old_metadata.get('pipeline_tag'), "other"),
-        "PluginId": "llama_cpp",
+        "ModelName": model_name,
+        "ModelType": model_type,
+        "PluginId": plugin_id,
+        "DeviceId": device_id,
+        "MinSDKVersion": min_sdk_version,
         "ModelFile": model_files,
         "MMProjFile": mmproj_file,
         "TokenizerFile": {
@@ -172,12 +216,15 @@ def create_gguf_manifest(repo_id: str, files: List[str], directory_path: str, ol
     return manifest
-def create_mlx_manifest(repo_id: str, files: List[str], directory_path: str, old_metadata: Dict[str, Any], is_mmproj: bool = False, file_name: Optional[Union[str, List[str]]] = None) -> Dict[str, Any]:
+def create_mlx_manifest(repo_id: str, files: List[str], directory_path: str, old_metadata: Dict[str, Any], is_mmproj: bool = False, file_name: Optional[Union[str, List[str]]] = None, **kwargs) -> Dict[str, Any]:
     """Create MLX format manifest."""
     # Load existing manifest to merge MLX files if it exists
     existing_manifest = load_nexa_manifest(directory_path)
+    # Check if there's a downloaded nexa.manifest from the repo
+    downloaded_manifest = old_metadata.get('downloaded_manifest', {})
     model_files = {}
     extra_files = []
@@ -233,10 +280,153 @@ def create_mlx_manifest(repo_id: str, files: List[str], directory_path: str, old
                 "Size": file_size
             })
+    # Determine PluginId with priority: kwargs > downloaded_manifest > model_file_type > default
+    plugin_id = kwargs.get('plugin_id')
+    if not plugin_id:
+        model_file_type = old_metadata.get('model_file_type')
+        if downloaded_manifest.get('PluginId'):
+            plugin_id = downloaded_manifest.get('PluginId')
+        elif model_file_type:
+            plugin_id = _get_plugin_id_from_model_file_type(model_file_type)
+        else:
+            plugin_id = "mlx"
+    # Determine ModelType with priority: kwargs > downloaded_manifest > pipeline_tag mapping
+    model_type = kwargs.get('model_type')
+    if not model_type:
+        if downloaded_manifest.get('ModelType'):
+            model_type = downloaded_manifest.get('ModelType')
+        else:
+            model_type = PIPELINE_TO_MODEL_TYPE.get(old_metadata.get('pipeline_tag'), "other")
+    # Determine ModelName with priority: kwargs > downloaded_manifest > empty string
+    model_name = kwargs.get('model_name')
+    if not model_name:
+        model_name = downloaded_manifest.get('ModelName', '')
+    # Get DeviceId and MinSDKVersion from kwargs or default to empty string
+    device_id = kwargs.get('device_id', '')
+    min_sdk_version = kwargs.get('min_sdk_version', '')
+    manifest = {
+        "Name": repo_id,
+        "ModelName": model_name,
+        "ModelType": model_type,
+        "PluginId": plugin_id,
+        "DeviceId": device_id,
+        "MinSDKVersion": min_sdk_version,
+        "ModelFile": model_files,
+        "MMProjFile": mmproj_file,
+        "TokenizerFile": {
+            "Name": "",
+            "Downloaded": False,
+            "Size": 0
+        },
+        "ExtraFiles": extra_files if extra_files else None,
+        # Preserve old metadata fields
+        "pipeline_tag": old_metadata.get('pipeline_tag') if old_metadata.get('pipeline_tag') else existing_manifest.get('pipeline_tag'),
+        "download_time": old_metadata.get('download_time') if old_metadata.get('download_time') else existing_manifest.get('download_time'),
+        "avatar_url": old_metadata.get('avatar_url') if old_metadata.get('avatar_url') else existing_manifest.get('avatar_url')
+    }
+    return manifest
+def create_npu_manifest(repo_id: str, files: List[str], directory_path: str, old_metadata: Dict[str, Any], is_mmproj: bool = False, file_name: Optional[Union[str, List[str]]] = None, **kwargs) -> Dict[str, Any]:
+    """Create NPU format manifest."""
+    # Load existing manifest to merge NPU files if it exists
+    existing_manifest = load_nexa_manifest(directory_path)
+    # Check if there's a downloaded nexa.manifest from the repo
+    downloaded_manifest = old_metadata.get('downloaded_manifest', {})
+    model_files = {}
+    extra_files = []
+    # Initialize MMProjFile
+    mmproj_file = {
+        "Name": "",
+        "Downloaded": False,
+        "Size": 0
+    }
+    for current_file_name in files:
+        file_path = os.path.join(directory_path, current_file_name)
+        file_size = 0
+        if os.path.exists(file_path):
+            try:
+                file_size = os.path.getsize(file_path)
+            except (OSError, IOError):
+                pass
+        # Check if this file is an mmproj file
+        is_current_mmproj = 'mmproj' in current_file_name.lower()
+        # If we're downloading specific files and this is marked as mmproj, respect that
+        if is_mmproj and file_name is not None:
+            filenames_to_check = file_name if isinstance(file_name, list) else [file_name]
+            is_current_mmproj = current_file_name in filenames_to_check
+        if is_current_mmproj:
+            # This is an mmproj file, put it in MMProjFile
+            mmproj_file = {
+                "Name": current_file_name,
+                "Downloaded": True,
+                "Size": file_size
+            }
+        else:
+            # For NPU, all non-mmproj files go to extra_files
+            extra_files.append({
+                "Name": current_file_name,
+                "Downloaded": True,
+                "Size": file_size
+            })
+    # Pick the first file from extra_files and add it to ModelFile with key "N/A"
+    if extra_files:
+        first_file = extra_files[0]
+        model_files["N/A"] = {
+            "Name": first_file["Name"],
+            "Downloaded": first_file["Downloaded"],
+            "Size": first_file["Size"]
+        }
+    # Determine PluginId with priority: kwargs > downloaded_manifest > model_file_type > default
+    plugin_id = kwargs.get('plugin_id')
+    if not plugin_id:
+        model_file_type = old_metadata.get('model_file_type')
+        if downloaded_manifest.get('PluginId'):
+            plugin_id = downloaded_manifest.get('PluginId')
+        elif model_file_type:
+            plugin_id = _get_plugin_id_from_model_file_type(model_file_type)
+        else:
+            plugin_id = "npu"
+    # Determine ModelType with priority: kwargs > downloaded_manifest > pipeline_tag mapping
+    model_type = kwargs.get('model_type')
+    if not model_type:
+        if downloaded_manifest.get('ModelType'):
+            model_type = downloaded_manifest.get('ModelType')
+        else:
+            model_type = PIPELINE_TO_MODEL_TYPE.get(old_metadata.get('pipeline_tag'), "other")
+    # Determine ModelName with priority: kwargs > downloaded_manifest > empty string
+    model_name = kwargs.get('model_name')
+    if not model_name:
+        model_name = downloaded_manifest.get('ModelName', '')
+    # Get DeviceId and MinSDKVersion from kwargs or default to empty string
+    device_id = kwargs.get('device_id', '')
+    min_sdk_version = kwargs.get('min_sdk_version', '')
     manifest = {
         "Name": repo_id,
-        "ModelType": PIPELINE_TO_MODEL_TYPE.get(old_metadata.get('pipeline_tag'), "other"),
-        "PluginId": "mlx",
+        "ModelName": model_name,
+        "ModelType": model_type,
+        "PluginId": plugin_id,
+        "DeviceId": device_id,
+        "MinSDKVersion": min_sdk_version,
         "ModelFile": model_files,
         "MMProjFile": mmproj_file,
         "TokenizerFile": {
@@ -254,8 +444,21 @@ def create_mlx_manifest(repo_id: str, files: List[str], directory_path: str, old
     return manifest
-def detect_model_type(files: List[str]) -> str:
-    """Detect if this is a GGUF or MLX model based on file extensions."""
+def detect_model_type(files: List[str], old_metadata: Dict[str, Any] = None) -> str:
+    """Detect if this is a GGUF, MLX, or NPU model based on file extensions and metadata.
+    Args:
+        files: List of files in the model directory
+        old_metadata: Metadata dict that may contain 'model_file_type'
+    Returns:
+        Model type string: 'gguf', 'mlx', or 'npu'
+    """
+    # Check if model_file_type is explicitly set to NPU
+    if old_metadata and old_metadata.get('model_file_type') == 'npu':
+        return "npu"
+    # Otherwise, detect based on file extensions
     has_gguf = any(f.endswith('.gguf') for f in files)
     has_safetensors = any(f.endswith('.safetensors') or 'safetensors' in f for f in files)
@@ -268,7 +471,7 @@ def detect_model_type(files: List[str]) -> str:
         return "mlx"
-def create_manifest_from_files(repo_id: str, files: List[str], directory_path: str, old_metadata: Dict[str, Any], is_mmproj: bool = False, file_name: Optional[Union[str, List[str]]] = None) -> Dict[str, Any]:
+def create_manifest_from_files(repo_id: str, files: List[str], directory_path: str, old_metadata: Dict[str, Any], is_mmproj: bool = False, file_name: Optional[Union[str, List[str]]] = None, **kwargs) -> Dict[str, Any]:
     """
     Create appropriate manifest format based on detected model type.
@@ -276,22 +479,25 @@ def create_manifest_from_files(repo_id: str, files: List[str], directory_path: s
         repo_id: Repository ID
         files: List of files in the model directory
         directory_path: Path to the model directory
-        old_metadata: Existing metadata (pipeline_tag, download_time, avatar_url)
+        old_metadata: Existing metadata (pipeline_tag, download_time, avatar_url, model_file_type)
         is_mmproj: Whether the downloaded file is an mmproj file
         file_name: The specific file(s) that were downloaded (None if entire repo was downloaded)
+        **kwargs: Additional metadata including plugin_id, model_name, model_type, device_id, min_sdk_version
     Returns:
         Dict containing the appropriate manifest format
     """
-    model_type = detect_model_type(files)
+    model_type = detect_model_type(files, old_metadata)
     if model_type == "gguf":
-        return create_gguf_manifest(repo_id, files, directory_path, old_metadata, is_mmproj, file_name)
+        return create_gguf_manifest(repo_id, files, directory_path, old_metadata, is_mmproj, file_name, **kwargs)
+    elif model_type == "npu":
+        return create_npu_manifest(repo_id, files, directory_path, old_metadata, is_mmproj, file_name, **kwargs)
     else:  # mlx or other
-        return create_mlx_manifest(repo_id, files, directory_path, old_metadata, is_mmproj, file_name)
+        return create_mlx_manifest(repo_id, files, directory_path, old_metadata, is_mmproj, file_name, **kwargs)
-def save_manifest_with_files_metadata(repo_id: str, local_dir: str, old_metadata: Dict[str, Any], is_mmproj: bool = False, file_name: Optional[Union[str, List[str]]] = None) -> None:
+def save_manifest_with_files_metadata(repo_id: str, local_dir: str, old_metadata: Dict[str, Any], is_mmproj: bool = False, file_name: Optional[Union[str, List[str]]] = None, **kwargs) -> None:
     """
     Create and save manifest based on files found in the directory.
@@ -301,6 +507,7 @@ def save_manifest_with_files_metadata(repo_id: str, local_dir: str, old_metadata
         old_metadata: Existing metadata to preserve
         is_mmproj: Whether the downloaded file is an mmproj file
         file_name: The specific file(s) that were downloaded (None if entire repo was downloaded)
+        **kwargs: Additional metadata including plugin_id, model_name, model_type, device_id, min_sdk_version
     """
     # Get list of files in the directory
     files = []
@@ -314,7 +521,7 @@ def save_manifest_with_files_metadata(repo_id: str, local_dir: str, old_metadata
         pass
     # Create appropriate manifest
-    manifest = create_manifest_from_files(repo_id, files, local_dir, old_metadata, is_mmproj, file_name)
+    manifest = create_manifest_from_files(repo_id, files, local_dir, old_metadata, is_mmproj, file_name, **kwargs)
     # Save manifest
     save_download_metadata(local_dir, manifest)