PyPI - nexaai - Versions diffs - 1.0.29__cp310-cp310-macosx_14_0_universal2.whl - Mend

nexaai 1.0.29__cp310-cp310-macosx_14_0_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (580) hide show

nexaai/utils/model_types.py ADDED Viewed

@@ -0,0 +1,49 @@
+"""
+Model type mappings for HuggingFace pipeline tags to our internal model types.
+This module provides centralized model type mapping functionality to avoid
+circular imports between other utility modules.
+"""
+from enum import Enum
+from typing import Dict
+class ModelTypeMapping(Enum):
+    """Enum for mapping HuggingFace pipeline_tag to our ModelType."""
+    TEXT_GENERATION = ("text-generation", "llm")
+    IMAGE_TEXT_TO_TEXT = ("image-text-to-text", "vlm")
+    ANY_TO_ANY = ("any-to-any", "ata")
+    AUTOMATIC_SPEECH_RECOGNITION = ("automatic-speech-recognition", "asr")
+    def __init__(self, pipeline_tag: str, model_type: str):
+        self.pipeline_tag = pipeline_tag
+        self.model_type = model_type
+# Create mapping dictionaries from the enum
+PIPELINE_TO_MODEL_TYPE: Dict[str, str] = {
+    mapping.pipeline_tag: mapping.model_type
+    for mapping in ModelTypeMapping
+}
+MODEL_TYPE_TO_PIPELINE: Dict[str, str] = {
+    mapping.model_type: mapping.pipeline_tag
+    for mapping in ModelTypeMapping
+}
+def map_pipeline_tag_to_model_type(pipeline_tag: str) -> str:
+    """Map HuggingFace pipeline_tag to our ModelType."""
+    if not pipeline_tag:
+        return "other"
+    return PIPELINE_TO_MODEL_TYPE.get(pipeline_tag, "other")
+def map_model_type_to_pipeline_tag(model_type: str) -> str:
+    """Reverse map ModelType back to HuggingFace pipeline_tag."""
+    if not model_type:
+        return None
+    return MODEL_TYPE_TO_PIPELINE.get(model_type)

nexaai/utils/progress_tracker.py ADDED Viewed

@@ -0,0 +1,389 @@
+"""
+Progress tracking utilities for downloads with tqdm integration.
+This module provides custom progress tracking classes that can monitor
+download progress with callback support and customizable display options.
+"""
+import os
+import sys
+import time
+from typing import Optional, Callable, Dict, Any
+from tqdm.auto import tqdm
+class CustomProgressTqdm(tqdm):
+    """Custom tqdm that tracks progress but completely hides terminal output."""
+    def __init__(self, *args, **kwargs):
+        # Filter out 'name' argument which might be passed by newer huggingface_hub versions
+        # but isn't supported by tqdm
+        kwargs.pop('name', None)
+        # Redirect output to devnull to completely suppress terminal output
+        kwargs['file'] = open(os.devnull, 'w')
+        kwargs['disable'] = False  # Keep enabled for tracking
+        kwargs['leave'] = False  # Don't leave progress bar
+        super().__init__(*args, **kwargs)
+    def display(self, msg=None, pos=None):
+        # Override display to show nothing
+        pass
+    def write(self, s, file=None, end="\n", nolock=False):
+        # Override write to prevent any output
+        pass
+    def close(self):
+        # Override close to avoid printing and properly close devnull
+        if hasattr(self, 'fp') and self.fp and self.fp != sys.stdout and self.fp != sys.stderr:
+            try:
+                self.fp.close()
+            except:
+                pass
+        self.disable = True
+        super(tqdm, self).close()
+class DownloadProgressTracker:
+    """Progress tracker for HuggingFace downloads with callback support."""
+    def __init__(self, progress_callback: Optional[Callable[[Dict[str, Any]], None]] = None, show_progress: bool = True):
+        self.progress_data: Dict[str, Dict[str, Any]] = {}
+        self.total_repo_size = 0
+        self.repo_file_count = 0
+        self.original_tqdm_update = None
+        self.original_tqdm_init = None
+        self.original_tqdm_display = None
+        self.original_tqdm_write = None
+        self.is_tracking = False
+        # Callback function
+        self.progress_callback = progress_callback
+        # Progress display
+        self.show_progress = show_progress
+        self.last_display_length = 0
+        # Speed tracking
+        self.last_downloaded = None  # Use None to indicate no previous measurement
+        self.last_time = None  # Use None to indicate no previous time measurement
+        self.speed_history = []
+        self.max_speed_history = 10
+        # Download status
+        self.download_status = "idle"  # idle, downloading, completed, error
+        self.error_message = None
+        self.download_start_time = None
+    def set_repo_info(self, total_size: int, file_count: int):
+        """Set the total repository size and file count before download."""
+        self.total_repo_size = total_size
+        self.repo_file_count = file_count
+    def register_tqdm(self, tqdm_instance):
+        """Register a tqdm instance for monitoring."""
+        tqdm_id = str(id(tqdm_instance))
+        self.progress_data[tqdm_id] = {
+            'current': 0,
+            'total': getattr(tqdm_instance, 'total', 0) or 0,
+            'desc': getattr(tqdm_instance, 'desc', 'Unknown'),
+            'tqdm_obj': tqdm_instance
+        }
+        # Trigger callback when new file is registered
+        self._trigger_callback()
+    def update_progress(self, tqdm_instance, n=1):
+        """Update progress for a tqdm instance."""
+        tqdm_id = str(id(tqdm_instance))
+        if tqdm_id in self.progress_data:
+            self.progress_data[tqdm_id]['current'] = getattr(tqdm_instance, 'n', 0)
+            self.progress_data[tqdm_id]['total'] = getattr(tqdm_instance, 'total', 0) or 0
+            # Trigger callback on every progress update
+            self._trigger_callback()
+    def calculate_speed(self, current_downloaded: int) -> float:
+        """Calculate download speed in bytes per second."""
+        current_time = time.time()
+        # Check if we have a previous measurement to compare against
+        if self.last_time is not None and self.last_downloaded is not None:
+            time_diff = current_time - self.last_time
+            # Only calculate if we have a meaningful time difference (avoid division by very small numbers)
+            if time_diff > 0.1:  # At least 100ms between measurements
+                bytes_diff = current_downloaded - self.last_downloaded
+                # Only calculate speed if bytes actually changed
+                if bytes_diff >= 0:  # Allow 0 for periods with no progress
+                    speed = bytes_diff / time_diff
+                    # Add to speed history for smoothing
+                    self.speed_history.append(speed)
+                    if len(self.speed_history) > self.max_speed_history:
+                        self.speed_history.pop(0)
+                    # Update tracking variables when we actually calculate speed
+                    self.last_downloaded = current_downloaded
+                    self.last_time = current_time
+        else:
+            # First measurement - initialize tracking variables
+            self.last_downloaded = current_downloaded
+            self.last_time = current_time
+        # Return the average of historical speeds if we have any
+        # This ensures we show the last known speed even when skipping updates
+        if self.speed_history:
+            return sum(self.speed_history) / len(self.speed_history)
+        return 0.0
+    def format_bytes(self, bytes_value: int) -> str:
+        """Format bytes to human readable string."""
+        for unit in ['B', 'KB', 'MB', 'GB']:
+            if bytes_value < 1024.0:
+                return f"{bytes_value:.1f} {unit}"
+            bytes_value /= 1024.0
+        return f"{bytes_value:.1f} TB"
+    def format_speed(self, speed: float) -> str:
+        """Format speed to human readable string."""
+        if speed == 0:
+            return "0 B/s"
+        for unit in ['B/s', 'KB/s', 'MB/s', 'GB/s']:
+            if speed < 1024.0:
+                return f"{speed:.1f} {unit}"
+            speed /= 1024.0
+        return f"{speed:.1f} TB/s"
+    def get_progress_data(self) -> Dict[str, Any]:
+        """Get current progress data."""
+        total_downloaded = 0
+        active_file_count = 0
+        total_file_sizes = 0
+        for data in self.progress_data.values():
+            if data['total'] > 0:
+                total_downloaded += data['current']
+                total_file_sizes += data['total']
+                active_file_count += 1
+        # Calculate speed (tracking variables are updated internally)
+        speed = self.calculate_speed(total_downloaded)
+        # Determine total size - prioritize pre-fetched repo size, then aggregate file sizes
+        if self.total_repo_size > 0:
+            # Use pre-fetched repository info if available
+            total_size = self.total_repo_size
+        elif total_file_sizes > 0:
+            # Use sum of individual file sizes if available
+            total_size = total_file_sizes
+        else:
+            # Last resort - we don't know the total size yet
+            total_size = 0
+        file_count = self.repo_file_count if self.repo_file_count > 0 else active_file_count
+        # Calculate percentage - handle unknown total size gracefully
+        if total_size > 0:
+            percentage = min((total_downloaded / total_size * 100), 100.0)
+        else:
+            percentage = 0
+        # Calculate ETA
+        eta_seconds = None
+        if speed > 0 and total_size > total_downloaded:
+            eta_seconds = (total_size - total_downloaded) / speed
+        # Calculate elapsed time
+        elapsed_seconds = None
+        if self.download_start_time:
+            elapsed_seconds = time.time() - self.download_start_time
+        return {
+            'status': self.download_status,
+            'error_message': self.error_message,
+            'progress': {
+                'total_downloaded': total_downloaded,
+                'total_size': total_size,
+                'percentage': round(percentage, 2),
+                'files_active': active_file_count,
+                'files_total': file_count,
+                'known_total': total_size > 0
+            },
+            'speed': {
+                'bytes_per_second': speed,
+                'formatted': self.format_speed(speed)
+            },
+            'formatting': {
+                'downloaded': self.format_bytes(total_downloaded),
+                'total_size': self.format_bytes(total_size)
+            },
+            'timing': {
+                'elapsed_seconds': elapsed_seconds,
+                'eta_seconds': eta_seconds,
+                'start_time': self.download_start_time
+            }
+        }
+    def _display_progress_bar(self, progress_data: Dict[str, Any]):
+        """Display a custom unified progress bar."""
+        if not self.show_progress:
+            return
+        # Clear previous line
+        if self.last_display_length > 0:
+            print('\r' + ' ' * self.last_display_length, end='\r')
+        progress_info = progress_data.get('progress', {})
+        speed_info = progress_data.get('speed', {})
+        timing_info = progress_data.get('timing', {})
+        formatting_info = progress_data.get('formatting', {})
+        percentage = progress_info.get('percentage', 0)
+        downloaded = formatting_info.get('downloaded', '0 B')
+        total_size_raw = progress_info.get('total_size', 0)
+        total_size = formatting_info.get('total_size', 'Unknown')
+        speed = speed_info.get('formatted', '0 B/s')
+        known_total = progress_info.get('known_total', False)
+        # Create progress bar
+        bar_width = 30
+        if known_total and total_size_raw > 0:
+            # Known total size - show actual progress
+            filled_width = int(bar_width * min(percentage, 100) / 100)
+            bar = '#' * filled_width + '-' * (bar_width - filled_width)
+        else:
+            # Unknown total size - show animated progress
+            animation_pos = int(time.time() * 2) % bar_width
+            bar = '-' * animation_pos + '#' + '-' * (bar_width - animation_pos - 1)
+        # Format the progress line
+        status = progress_data.get('status', 'unknown')
+        if status == 'downloading':
+            if known_total:
+                progress_line = f"[{bar}] {percentage:.1f}% | {downloaded}/{total_size} | {speed}"
+            else:
+                progress_line = f"[{bar}] {downloaded} | {speed} | Calculating size..."
+        elif status == 'completed':
+            progress_line = f"[{bar}] 100.0% | {downloaded} | Complete!"
+        elif status == 'error':
+            progress_line = f"Error: {progress_data.get('error_message', 'Unknown error')}"
+        else:
+            progress_line = f"Starting download..."
+        # Display and track length for next clear
+        print(progress_line, end='', flush=True)
+        self.last_display_length = len(progress_line)
+    def _clear_progress_bar(self):
+        """Clear the progress bar display."""
+        if self.show_progress and self.last_display_length > 0:
+            print('\r' + ' ' * self.last_display_length, end='\r')
+            print()  # Move to next line
+            self.last_display_length = 0
+    def _trigger_callback(self):
+        """Trigger the progress callback if one is set."""
+        progress_data = self.get_progress_data()
+        if self.progress_callback:
+            try:
+                self.progress_callback(progress_data)
+            except Exception as e:
+                print(f"Error in progress callback: {e}")
+        # Show custom progress bar only if callback is enabled and show_progress is True
+        if self.progress_callback and self.show_progress:
+            self._display_progress_bar(progress_data)
+    def start_tracking(self):
+        """Start progress tracking (monkey patch tqdm)."""
+        if self.is_tracking:
+            return
+        # Store original methods
+        self.original_tqdm_update = tqdm.update
+        self.original_tqdm_init = tqdm.__init__
+        self.original_tqdm_display = tqdm.display
+        self.original_tqdm_write = tqdm.write
+        # Create references to self for the nested functions
+        tracker = self
+        def patched_init(self_tqdm, *args, **kwargs):
+            # Suppress tqdm display by redirecting to devnull
+            kwargs['file'] = open(os.devnull, 'w')
+            kwargs['disable'] = False  # Keep enabled for tracking
+            kwargs['leave'] = False  # Don't leave progress bar
+            result = tracker.original_tqdm_init(self_tqdm, *args, **kwargs)
+            tracker.register_tqdm(self_tqdm)
+            return result
+        def patched_update(self_tqdm, n=1):
+            result = tracker.original_tqdm_update(self_tqdm, n)
+            tracker.update_progress(self_tqdm, n)
+            return result
+        def patched_display(self_tqdm, msg=None, pos=None):
+            # Override display to show nothing
+            pass
+        def patched_write(self_tqdm, s, file=None, end="\n", nolock=False):
+            # Override write to prevent any output
+            pass
+        # Apply patches
+        tqdm.__init__ = patched_init
+        tqdm.update = patched_update
+        tqdm.display = patched_display
+        tqdm.write = patched_write
+        self.is_tracking = True
+        self.download_status = "downloading"
+        self.download_start_time = time.time()
+        # Trigger initial callback
+        self._trigger_callback()
+    def stop_tracking(self):
+        """Stop progress tracking and restore original tqdm."""
+        if not self.is_tracking:
+            return
+        # Restore original tqdm methods
+        if self.original_tqdm_update:
+            tqdm.update = self.original_tqdm_update
+        if self.original_tqdm_init:
+            tqdm.__init__ = self.original_tqdm_init
+        if hasattr(self, 'original_tqdm_display') and self.original_tqdm_display:
+            tqdm.display = self.original_tqdm_display
+        if hasattr(self, 'original_tqdm_write') and self.original_tqdm_write:
+            tqdm.write = self.original_tqdm_write
+        # Clean up any open devnull file handles from tqdm instances
+        for data in self.progress_data.values():
+            if 'tqdm_obj' in data and hasattr(data['tqdm_obj'], 'fp'):
+                try:
+                    fp = data['tqdm_obj'].fp
+                    if fp and fp != sys.stdout and fp != sys.stderr and not fp.closed:
+                        fp.close()
+                except:
+                    pass
+        self.is_tracking = False
+        if self.download_status == "downloading":
+            self.download_status = "completed"
+        # Trigger final callback and clear progress bar
+        self._trigger_callback()
+        self._clear_progress_bar()
+    def set_error(self, error_message: str):
+        """Set error status and trigger callback."""
+        self.download_status = "error"
+        self.error_message = error_message
+        self._trigger_callback()
+        self._clear_progress_bar()

nexaai/utils/quantization_utils.py ADDED Viewed

@@ -0,0 +1,245 @@
+"""
+Quantization utilities for extracting quantization types from model files and configurations.
+This module provides utilities to extract quantization information from:
+- GGUF model filenames
+- MLX model repository IDs
+- MLX model config.json files
+"""
+import os
+import json
+import re
+import logging
+from enum import Enum
+from typing import Optional
+# Set up logger
+logger = logging.getLogger(__name__)
+class QuantizationType(str, Enum):
+    """Enum for GGUF and MLX model quantization types."""
+    # GGUF quantization types
+    BF16 = "BF16"
+    F16 = "F16"
+    Q2_K = "Q2_K"
+    Q2_K_L = "Q2_K_L"
+    Q3_K = "Q3_K"
+    Q3_K_M = "Q3_K_M"
+    Q3_K_S = "Q3_K_S"
+    Q4_0 = "Q4_0"
+    Q4_1 = "Q4_1"
+    Q4_K = "Q4_K"
+    Q4_K_M = "Q4_K_M"
+    Q4_K_S = "Q4_K_S"
+    Q5_K = "Q5_K"
+    Q5_K_M = "Q5_K_M"
+    Q5_K_S = "Q5_K_S"
+    Q6_K = "Q6_K"
+    Q8_0 = "Q8_0"
+    MXFP4 = "MXFP4"
+    MXFP8 = "MXFP8"
+    # MLX bit-based quantization types
+    BIT_1 = "1BIT"
+    BIT_2 = "2BIT"
+    BIT_3 = "3BIT"
+    BIT_4 = "4BIT"
+    BIT_5 = "5BIT"
+    BIT_6 = "6BIT"
+    BIT_7 = "7BIT"
+    BIT_8 = "8BIT"
+    BIT_16 = "16BIT"
+def extract_quantization_from_filename(filename: str) -> Optional[QuantizationType]:
+    """
+    Extract quantization type from filename.
+    Args:
+        filename: The filename to extract quantization from
+    Returns:
+        QuantizationType enum value or None if not found
+    """
+    # Define mapping from lowercase patterns to enum values
+    # Include "." to ensure precise matching (e.g., "q4_0." not "q4_0_xl")
+    pattern_to_enum = {
+        'bf16.': QuantizationType.BF16,
+        'f16.': QuantizationType.F16,  # Add F16 support
+        'q2_k_l.': QuantizationType.Q2_K_L,  # Check Q2_K_L before Q2_K to avoid partial match
+        'q2_k.': QuantizationType.Q2_K,
+        'q3_k.': QuantizationType.Q3_K,
+        'q3_k_m.': QuantizationType.Q3_K_M,
+        'q3_k_s.': QuantizationType.Q3_K_S,
+        'q4_k_m.': QuantizationType.Q4_K_M,
+        'q4_k_s.': QuantizationType.Q4_K_S,
+        'q4_0.': QuantizationType.Q4_0,
+        'q4_1.': QuantizationType.Q4_1,
+        'q4_k.': QuantizationType.Q4_K,
+        'q5_k.': QuantizationType.Q5_K,
+        'q5_k_m.': QuantizationType.Q5_K_M,
+        'q5_k_s.': QuantizationType.Q5_K_S,
+        'q6_k.': QuantizationType.Q6_K,
+        'q8_0.': QuantizationType.Q8_0,
+        'mxfp4.': QuantizationType.MXFP4,
+        'mxfp8.': QuantizationType.MXFP8,
+    }
+    filename_lower = filename.lower()
+    # Check longer patterns first to avoid partial matches
+    # Sort by length descending to check q2_k_l before q2_k, q4_k_m before q4_0, etc.
+    for pattern in sorted(pattern_to_enum.keys(), key=len, reverse=True):
+        if pattern in filename_lower:
+            return pattern_to_enum[pattern]
+    return None
+def extract_quantization_from_repo_id(repo_id: str) -> Optional[QuantizationType]:
+    """
+    Extract quantization type from repo_id for MLX models by looking for bit patterns.
+    Args:
+        repo_id: The repository ID to extract quantization from
+    Returns:
+        QuantizationType enum value or None if not found
+    """
+    # Define mapping from bit numbers to enum values
+    bit_to_enum = {
+        1: QuantizationType.BIT_1,
+        2: QuantizationType.BIT_2,
+        3: QuantizationType.BIT_3,
+        4: QuantizationType.BIT_4,
+        5: QuantizationType.BIT_5,
+        6: QuantizationType.BIT_6,
+        7: QuantizationType.BIT_7,
+        8: QuantizationType.BIT_8,
+        16: QuantizationType.BIT_16,
+    }
+    # First check for patterns like "4bit", "8bit" etc. (case insensitive)
+    pattern = r'(\d+)bit'
+    matches = re.findall(pattern, repo_id.lower())
+    for match in matches:
+        try:
+            bit_number = int(match)
+            if bit_number in bit_to_enum:
+                logger.debug(f"Found {bit_number}bit quantization in repo_id: {repo_id}")
+                return bit_to_enum[bit_number]
+        except ValueError:
+            continue
+    # Also check for patterns like "-q8", "_Q4" etc.
+    q_pattern = r'[-_]q(\d+)'
+    q_matches = re.findall(q_pattern, repo_id.lower())
+    for match in q_matches:
+        try:
+            bit_number = int(match)
+            if bit_number in bit_to_enum:
+                logger.debug(f"Found Q{bit_number} quantization in repo_id: {repo_id}")
+                return bit_to_enum[bit_number]
+        except ValueError:
+            continue
+    return None
+def extract_quantization_from_mlx_config(mlx_folder_path: str) -> Optional[QuantizationType]:
+    """
+    Extract quantization type from MLX model's config.json file.
+    Args:
+        mlx_folder_path: Path to the MLX model folder
+    Returns:
+        QuantizationType enum value or None if not found
+    """
+    config_path = os.path.join(mlx_folder_path, "config.json")
+    if not os.path.exists(config_path):
+        logger.debug(f"Config file not found: {config_path}")
+        return None
+    try:
+        with open(config_path, 'r', encoding='utf-8') as f:
+            config = json.load(f)
+        # Look for quantization.bits field
+        quantization_config = config.get("quantization", {})
+        if isinstance(quantization_config, dict):
+            bits = quantization_config.get("bits")
+            if isinstance(bits, int):
+                # Define mapping from bit numbers to enum values
+                bit_to_enum = {
+                    1: QuantizationType.BIT_1,
+                    2: QuantizationType.BIT_2,
+                    3: QuantizationType.BIT_3,
+                    4: QuantizationType.BIT_4,
+                    5: QuantizationType.BIT_5,
+                    6: QuantizationType.BIT_6,
+                    7: QuantizationType.BIT_7,
+                    8: QuantizationType.BIT_8,
+                    16: QuantizationType.BIT_16,
+                }
+                if bits in bit_to_enum:
+                    logger.debug(f"Found {bits}bit quantization in config.json: {config_path}")
+                    return bit_to_enum[bits]
+                else:
+                    logger.debug(f"Unsupported quantization bits value: {bits}")
+    except (json.JSONDecodeError, IOError) as e:
+        logger.warning(f"Error reading config.json from {config_path}: {e}")
+    except Exception as e:
+        logger.warning(f"Unexpected error reading config.json from {config_path}: {e}")
+    return None
+def extract_gguf_quantization(filename: str) -> str:
+    """
+    Extract quantization level from GGUF filename using the enum-based approach.
+    This function provides backward compatibility by returning a string representation
+    of the quantization type.
+    Args:
+        filename: The GGUF filename
+    Returns:
+        String representation of the quantization type or "UNKNOWN" if not found
+    """
+    quantization_type = extract_quantization_from_filename(filename)
+    if quantization_type:
+        return quantization_type.value
+    return "UNKNOWN"
+def detect_quantization_for_mlx(repo_id: str, directory_path: str) -> Optional[QuantizationType]:
+    """
+    Detect quantization for MLX models using multiple methods in priority order.
+    Args:
+        repo_id: The repository ID
+        directory_path: Path to the model directory
+    Returns:
+        QuantizationType enum value or None if not found
+    """
+    # Method 1: Extract from repo_id
+    quantization_type = extract_quantization_from_repo_id(repo_id)
+    if quantization_type:
+        return quantization_type
+    # Method 2: Extract from config.json if available
+    quantization_type = extract_quantization_from_mlx_config(directory_path)
+    if quantization_type:
+        return quantization_type
+    return None