PyPI - nexaai - Versions diffs - 1.0.6rc1__cp310-cp310-macosx_14_0_universal2.whl → 1.0.7__cp310-cp310-macosx_14_0_universal2.whl - Mend

nexaai 1.0.6rc1__cp310-cp310-macosx_14_0_universal2.whl → 1.0.7__cp310-cp310-macosx_14_0_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of nexaai might be problematic. Click here for more details.

Files changed (16) hide show

nexaai/_stub.cpython-310-darwin.so +0 -0
nexaai/_version.py +1 -1
nexaai/binds/libnexa_bridge.dylib +0 -0
nexaai/embedder_impl/mlx_embedder_impl.py +6 -5
nexaai/mlx_backend/embedding/generate.py +219 -16
nexaai/mlx_backend/embedding/interface.py +346 -41
nexaai/mlx_backend/embedding/main.py +126 -35
nexaai/utils/manifest_utils.py +280 -0
nexaai/utils/model_manager.py +64 -73
nexaai/utils/model_types.py +47 -0
nexaai/utils/progress_tracker.py +10 -6
nexaai/utils/quantization_utils.py +239 -0
{nexaai-1.0.6rc1.dist-info → nexaai-1.0.7.dist-info}/METADATA +2 -1
{nexaai-1.0.6rc1.dist-info → nexaai-1.0.7.dist-info}/RECORD +16 -13
{nexaai-1.0.6rc1.dist-info → nexaai-1.0.7.dist-info}/WHEEL +0 -0
{nexaai-1.0.6rc1.dist-info → nexaai-1.0.7.dist-info}/top_level.txt +0 -0

nexaai/utils/quantization_utils.py ADDED Viewed

@@ -0,0 +1,239 @@
+"""
+Quantization utilities for extracting quantization types from model files and configurations.
+This module provides utilities to extract quantization information from:
+- GGUF model filenames
+- MLX model repository IDs
+- MLX model config.json files
+"""
+import os
+import json
+import re
+import logging
+from enum import Enum
+from typing import Optional
+# Set up logger
+logger = logging.getLogger(__name__)
+class QuantizationType(str, Enum):
+    """Enum for GGUF and MLX model quantization types."""
+    # GGUF quantization types
+    BF16 = "BF16"
+    F16 = "F16"
+    Q2_K = "Q2_K"
+    Q2_K_L = "Q2_K_L"
+    Q3_K_M = "Q3_K_M"
+    Q3_K_S = "Q3_K_S"
+    Q4_0 = "Q4_0"
+    Q4_1 = "Q4_1"
+    Q4_K_M = "Q4_K_M"
+    Q4_K_S = "Q4_K_S"
+    Q5_K_M = "Q5_K_M"
+    Q5_K_S = "Q5_K_S"
+    Q6_K = "Q6_K"
+    Q8_0 = "Q8_0"
+    MXFP4 = "MXFP4"
+    MXFP8 = "MXFP8"
+    # MLX bit-based quantization types
+    BIT_1 = "1BIT"
+    BIT_2 = "2BIT"
+    BIT_3 = "3BIT"
+    BIT_4 = "4BIT"
+    BIT_5 = "5BIT"
+    BIT_6 = "6BIT"
+    BIT_7 = "7BIT"
+    BIT_8 = "8BIT"
+    BIT_16 = "16BIT"
+def extract_quantization_from_filename(filename: str) -> Optional[QuantizationType]:
+    """
+    Extract quantization type from filename.
+    Args:
+        filename: The filename to extract quantization from
+    Returns:
+        QuantizationType enum value or None if not found
+    """
+    # Define mapping from lowercase patterns to enum values
+    # Include "." to ensure precise matching (e.g., "q4_0." not "q4_0_xl")
+    pattern_to_enum = {
+        'bf16.': QuantizationType.BF16,
+        'f16.': QuantizationType.F16,  # Add F16 support
+        'q2_k_l.': QuantizationType.Q2_K_L,  # Check Q2_K_L before Q2_K to avoid partial match
+        'q2_k.': QuantizationType.Q2_K,
+        'q3_k_m.': QuantizationType.Q3_K_M,
+        'q3_ks.': QuantizationType.Q3_K_S,
+        'q4_k_m.': QuantizationType.Q4_K_M,
+        'q4_k_s.': QuantizationType.Q4_K_S,
+        'q4_0.': QuantizationType.Q4_0,
+        'q4_1.': QuantizationType.Q4_1,
+        'q5_k_m.': QuantizationType.Q5_K_M,
+        'q5_k_s.': QuantizationType.Q5_K_S,
+        'q6_k.': QuantizationType.Q6_K,
+        'q8_0.': QuantizationType.Q8_0,
+        'mxfp4.': QuantizationType.MXFP4,
+        'mxfp8.': QuantizationType.MXFP8,
+    }
+    filename_lower = filename.lower()
+    # Check longer patterns first to avoid partial matches
+    # Sort by length descending to check q2_k_l before q2_k, q4_k_m before q4_0, etc.
+    for pattern in sorted(pattern_to_enum.keys(), key=len, reverse=True):
+        if pattern in filename_lower:
+            return pattern_to_enum[pattern]
+    return None
+def extract_quantization_from_repo_id(repo_id: str) -> Optional[QuantizationType]:
+    """
+    Extract quantization type from repo_id for MLX models by looking for bit patterns.
+    Args:
+        repo_id: The repository ID to extract quantization from
+    Returns:
+        QuantizationType enum value or None if not found
+    """
+    # Define mapping from bit numbers to enum values
+    bit_to_enum = {
+        1: QuantizationType.BIT_1,
+        2: QuantizationType.BIT_2,
+        3: QuantizationType.BIT_3,
+        4: QuantizationType.BIT_4,
+        5: QuantizationType.BIT_5,
+        6: QuantizationType.BIT_6,
+        7: QuantizationType.BIT_7,
+        8: QuantizationType.BIT_8,
+        16: QuantizationType.BIT_16,
+    }
+    # First check for patterns like "4bit", "8bit" etc. (case insensitive)
+    pattern = r'(\d+)bit'
+    matches = re.findall(pattern, repo_id.lower())
+    for match in matches:
+        try:
+            bit_number = int(match)
+            if bit_number in bit_to_enum:
+                logger.debug(f"Found {bit_number}bit quantization in repo_id: {repo_id}")
+                return bit_to_enum[bit_number]
+        except ValueError:
+            continue
+    # Also check for patterns like "-q8", "_Q4" etc.
+    q_pattern = r'[-_]q(\d+)'
+    q_matches = re.findall(q_pattern, repo_id.lower())
+    for match in q_matches:
+        try:
+            bit_number = int(match)
+            if bit_number in bit_to_enum:
+                logger.debug(f"Found Q{bit_number} quantization in repo_id: {repo_id}")
+                return bit_to_enum[bit_number]
+        except ValueError:
+            continue
+    return None
+def extract_quantization_from_mlx_config(mlx_folder_path: str) -> Optional[QuantizationType]:
+    """
+    Extract quantization type from MLX model's config.json file.
+    Args:
+        mlx_folder_path: Path to the MLX model folder
+    Returns:
+        QuantizationType enum value or None if not found
+    """
+    config_path = os.path.join(mlx_folder_path, "config.json")
+    if not os.path.exists(config_path):
+        logger.debug(f"Config file not found: {config_path}")
+        return None
+    try:
+        with open(config_path, 'r', encoding='utf-8') as f:
+            config = json.load(f)
+        # Look for quantization.bits field
+        quantization_config = config.get("quantization", {})
+        if isinstance(quantization_config, dict):
+            bits = quantization_config.get("bits")
+            if isinstance(bits, int):
+                # Define mapping from bit numbers to enum values
+                bit_to_enum = {
+                    1: QuantizationType.BIT_1,
+                    2: QuantizationType.BIT_2,
+                    3: QuantizationType.BIT_3,
+                    4: QuantizationType.BIT_4,
+                    5: QuantizationType.BIT_5,
+                    6: QuantizationType.BIT_6,
+                    7: QuantizationType.BIT_7,
+                    8: QuantizationType.BIT_8,
+                    16: QuantizationType.BIT_16,
+                }
+                if bits in bit_to_enum:
+                    logger.debug(f"Found {bits}bit quantization in config.json: {config_path}")
+                    return bit_to_enum[bits]
+                else:
+                    logger.debug(f"Unsupported quantization bits value: {bits}")
+    except (json.JSONDecodeError, IOError) as e:
+        logger.warning(f"Error reading config.json from {config_path}: {e}")
+    except Exception as e:
+        logger.warning(f"Unexpected error reading config.json from {config_path}: {e}")
+    return None
+def extract_gguf_quantization(filename: str) -> str:
+    """
+    Extract quantization level from GGUF filename using the enum-based approach.
+    This function provides backward compatibility by returning a string representation
+    of the quantization type.
+    Args:
+        filename: The GGUF filename
+    Returns:
+        String representation of the quantization type or "UNKNOWN" if not found
+    """
+    quantization_type = extract_quantization_from_filename(filename)
+    if quantization_type:
+        return quantization_type.value
+    return "UNKNOWN"
+def detect_quantization_for_mlx(repo_id: str, directory_path: str) -> Optional[QuantizationType]:
+    """
+    Detect quantization for MLX models using multiple methods in priority order.
+    Args:
+        repo_id: The repository ID
+        directory_path: Path to the model directory
+    Returns:
+        QuantizationType enum value or None if not found
+    """
+    # Method 1: Extract from repo_id
+    quantization_type = extract_quantization_from_repo_id(repo_id)
+    if quantization_type:
+        return quantization_type
+    # Method 2: Extract from config.json if available
+    quantization_type = extract_quantization_from_mlx_config(directory_path)
+    if quantization_type:
+        return quantization_type
+    return None

{nexaai-1.0.6rc1.dist-info → nexaai-1.0.7.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: nexaai
-Version: 1.0.6rc1
+Version: 1.0.7
 Summary: Python bindings for NexaSDK C-lib backend
 Author-email: "Nexa AI, Inc." <dev@nexa.ai>
 Project-URL: Homepage, https://github.com/NexaAI/nexasdk-bridge
@@ -21,6 +21,7 @@ Provides-Extra: mlx
 Requires-Dist: mlx; extra == "mlx"
 Requires-Dist: mlx-lm; extra == "mlx"
 Requires-Dist: mlx-vlm; extra == "mlx"
+Requires-Dist: mlx-embeddings; extra == "mlx"
 Requires-Dist: tokenizers; extra == "mlx"
 Requires-Dist: safetensors; extra == "mlx"
 Requires-Dist: Pillow; extra == "mlx"

{nexaai-1.0.6rc1.dist-info → nexaai-1.0.7.dist-info}/RECORD RENAMED Viewed

@@ -1,6 +1,6 @@
 nexaai/__init__.py,sha256=jXdC4vv6DBK1fVewYTYSUhOOYfvf_Mk81UIeMGGIKUg,2029
-nexaai/_stub.cpython-310-darwin.so,sha256=HD3LnNPlQm7XugP9lz_ed5o9EIZZwH5_SUfJWjeyJwg,66768
-nexaai/_version.py,sha256=o8WPRe-h5be83JEwTPwBVdsZ20QQ2VFyIwzifvgbiPQ,142
+nexaai/_stub.cpython-310-darwin.so,sha256=f2Z51NtVXCP4Jt6YFxsABdZxVReTE4jWqJSks_DFJtk,66768
+nexaai/_version.py,sha256=HMQ_cuen1UlESzaxkeIlsIDBtPl1Uc9t60FOoMWVLcM,138
 nexaai/asr.py,sha256=NljMXDErwPNMOPaRkJZMEDka9Nk8xyur7L8i924TStY,2054
 nexaai/base.py,sha256=N8PRgDFA-XPku2vWnQIofQ7ipz3pPlO6f8YZGnuhquE,982
 nexaai/common.py,sha256=yBnIbqYaQYnfrl7IczOBh6MDibYZVxwaRJEglYcKgGs,3422
@@ -19,7 +19,7 @@ nexaai/binds/__init__.py,sha256=T9Ua7SzHNglSeEqXlfH5ymYXRyXhNKkC9z_y_bWCNMo,80
 nexaai/binds/common_bind.cpython-310-darwin.so,sha256=hVxY76tn7hN6uHDIgM7LWNvgoudHgNZVoaygM9X1RWE,217232
 nexaai/binds/embedder_bind.cpython-310-darwin.so,sha256=FT8581RNciilskK89PhtnNSjw4Oh0-xk8QdbJVFmOd8,202064
 nexaai/binds/libcrypto.dylib,sha256=aWif9WhTKVQhmZL3DmtIpMkZY5JSb_Ny6CClmUBKYM4,4710416
-nexaai/binds/libnexa_bridge.dylib,sha256=hv4zUyl0ajPO_84svUUssADt0qGeLouyMGeeyqsrWOY,251480
+nexaai/binds/libnexa_bridge.dylib,sha256=9xmdJs9T2eulxIYJJ2axhnXCYeVTTFE_5b3qF9mDsLE,251480
 nexaai/binds/libssl.dylib,sha256=Q2frAdhR729oKYuCjJOEr1Ott3idFWoFp98fwNqtIaU,881616
 nexaai/binds/llm_bind.cpython-310-darwin.so,sha256=3Bsq0_tGkM027-bORVeJUDl6CYZxAF9sbDIn1l31XTQ,182704
 nexaai/binds/nexa_llama_cpp/libggml-base.dylib,sha256=JM4oOkie1su0ES5hMdtILeQHlRukRzH1vTleTupUXhg,650736
@@ -186,7 +186,7 @@ nexaai/cv_impl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 nexaai/cv_impl/mlx_cv_impl.py,sha256=gKECQOv8iaWwG3bl7xeqVy2NN_9K7tYerIFzfn4eLo4,3228
 nexaai/cv_impl/pybind_cv_impl.py,sha256=uSmwBste4cT7c8DQmXzRLmzwDf773PAbXNYWW1UzVls,1064
 nexaai/embedder_impl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-nexaai/embedder_impl/mlx_embedder_impl.py,sha256=OsDzsc_2wZkSoWu6yCOZadMkaYdBW3uyjF11hDKTaX8,4383
+nexaai/embedder_impl/mlx_embedder_impl.py,sha256=dTjOC1VJ9ypIgCvkK_jKNSWpswbg132rDcTzWcL5oFA,4482
 nexaai/embedder_impl/pybind_embedder_impl.py,sha256=Ga1JYauVkRq6jwAGL7Xx5HDaIx483_v9gZVoTyd3xNU,3495
 nexaai/image_gen_impl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 nexaai/image_gen_impl/mlx_image_gen_impl.py,sha256=BuDkksvXyb4J02GsdnbGAmYckfUU0Eah6BimoMD3QqY,11219
@@ -206,9 +206,9 @@ nexaai/mlx_backend/cv/interface.py,sha256=qE51ApUETEZxDMPZB4VdV098fsXcIiEg4Hj9za
 nexaai/mlx_backend/cv/main.py,sha256=hYaF2C36hKTyy7kGMNkzLrdczPiFVS73H320klzzpVM,2856
 nexaai/mlx_backend/cv/modeling/pp_ocr_v4.py,sha256=Vpa-QTy7N5oFfGI7Emldx1dOYJWv_4nAFNRDz_5vHBI,58593
 nexaai/mlx_backend/embedding/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-nexaai/mlx_backend/embedding/generate.py,sha256=irAbc_nBD9wMqe5z1eFgp6Gf_mONow2I3z3g-DAAbtY,5018
-nexaai/mlx_backend/embedding/interface.py,sha256=hW0yrtD55ol0hB-X5glcXMc4TiyKuT4U5GaI8SP-kAU,11508
-nexaai/mlx_backend/embedding/main.py,sha256=_kIwz69A7UXA_u0VNP6eqM2W-LH_1_1hlJtro6U_FjI,2620
+nexaai/mlx_backend/embedding/generate.py,sha256=leZA0Ir78-5GV3jloPKYSAKgb04Wr5jORFJlSSVyKs0,12855
+nexaai/mlx_backend/embedding/interface.py,sha256=M7AGiq_UVLNIi2Ie6H08ySnMxIjIhUlNgmV9I_rKYt4,22742
+nexaai/mlx_backend/embedding/main.py,sha256=xKRebBcooKuf8DzWKwCicftes3MAcYAd1QvcT9_AAPQ,6003
 nexaai/mlx_backend/embedding/modeling/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 nexaai/mlx_backend/embedding/modeling/nexa_jina_v2.py,sha256=F9Z_9r-Dh0wNThiMp5W5hqE2dt5bf4ps5_c6h4BuWGw,15218
 nexaai/mlx_backend/llm/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -511,12 +511,15 @@ nexaai/tts_impl/mlx_tts_impl.py,sha256=i_uNPdvlXYtL3e01oKjDlP9jgkWCRt1bBHsExaaiJ
 nexaai/tts_impl/pybind_tts_impl.py,sha256=mpn44r6pfYLIl-NrEy2dXHjGtWtNCmM7HRyxiANxUI4,1444
 nexaai/utils/avatar_fetcher.py,sha256=bWy8ujgbOiTHFCjFxTwkn3uXbZ84PgEGUkXkR3MH4bI,3821
 nexaai/utils/decode.py,sha256=61n4Zf6c5QLyqGoctEitlI9BX3tPlP2a5aaKNHbw3T4,404
-nexaai/utils/model_manager.py,sha256=c07ocxxw1IHCQw6esbmYK0dX2R2OajfEIGsC_2teHXo,48572
-nexaai/utils/progress_tracker.py,sha256=76HlPkyN41IMHSsH56-qdlN_aY_oBfJz50J16Cx67R0,15102
+nexaai/utils/manifest_utils.py,sha256=2waOuQErodNHhoAETQqlQgXdVes-T5A4HMb8pUIN9hg,9765
+nexaai/utils/model_manager.py,sha256=xzerYqXkvRrHEqpEQvhOeg_6XQho2BvYw6ee4dlz69A,48575
+nexaai/utils/model_types.py,sha256=-DER8L4lAUR_iLS99F0r57avwqWtuN21ug5pX2p24_E,1369
+nexaai/utils/progress_tracker.py,sha256=mTw7kaKH8BkmecYm7iBMqRHd9uUH4Ch0S8CzbpARDCk,15404
+nexaai/utils/quantization_utils.py,sha256=4gvp6UQfSO9G1FYBwnFtQspTzH9sDbi1PBXw2t1N69M,7650
 nexaai/vlm_impl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 nexaai/vlm_impl/mlx_vlm_impl.py,sha256=od1R1mRoIgPG3NHC7JiDlcB_YJY8aklX8Em3ZkeHNpE,10734
 nexaai/vlm_impl/pybind_vlm_impl.py,sha256=5ZMFgDATthmMzjrd-vE5KX5ZAMoWPYbF_FTLz8DBKIk,8908
-nexaai-1.0.6rc1.dist-info/METADATA,sha256=U2gJx8JlzG3wUYtVYk7VdDN7ildkHxWTQUE5Oya_Z_s,1154
-nexaai-1.0.6rc1.dist-info/WHEEL,sha256=T2p57lol9__xkoU6aJTyN1Pm43ZpRU3q6km7mIbrAMs,114
-nexaai-1.0.6rc1.dist-info/top_level.txt,sha256=LRE2YERlrZk2vfuygnSzsEeqSknnZbz3Z1MHyNmBU4w,7
-nexaai-1.0.6rc1.dist-info/RECORD,,
+nexaai-1.0.7.dist-info/METADATA,sha256=DMyi7lxZHVYv62pJQ6SemiNzIqHSGuS4-r5vHO9llJw,1197
+nexaai-1.0.7.dist-info/WHEEL,sha256=T2p57lol9__xkoU6aJTyN1Pm43ZpRU3q6km7mIbrAMs,114
+nexaai-1.0.7.dist-info/top_level.txt,sha256=LRE2YERlrZk2vfuygnSzsEeqSknnZbz3Z1MHyNmBU4w,7
+nexaai-1.0.7.dist-info/RECORD,,

{nexaai-1.0.6rc1.dist-info → nexaai-1.0.7.dist-info}/WHEEL RENAMED Viewed

File without changes

{nexaai-1.0.6rc1.dist-info → nexaai-1.0.7.dist-info}/top_level.txt RENAMED Viewed

File without changes