PyPI - cortex-llm - Versions diffs - 1.0.0__py3-none-any.whl - Mend

cortex-llm 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (48) hide show

cortex/__init__.py +73 -0
cortex/__main__.py +83 -0
cortex/config.py +329 -0
cortex/conversation_manager.py +468 -0
cortex/fine_tuning/__init__.py +8 -0
cortex/fine_tuning/dataset.py +332 -0
cortex/fine_tuning/mlx_lora_trainer.py +502 -0
cortex/fine_tuning/trainer.py +957 -0
cortex/fine_tuning/wizard.py +707 -0
cortex/gpu_validator.py +467 -0
cortex/inference_engine.py +727 -0
cortex/metal/__init__.py +275 -0
cortex/metal/gpu_validator.py +177 -0
cortex/metal/memory_pool.py +886 -0
cortex/metal/mlx_accelerator.py +678 -0
cortex/metal/mlx_converter.py +638 -0
cortex/metal/mps_optimizer.py +417 -0
cortex/metal/optimizer.py +665 -0
cortex/metal/performance_profiler.py +364 -0
cortex/model_downloader.py +130 -0
cortex/model_manager.py +2187 -0
cortex/quantization/__init__.py +5 -0
cortex/quantization/dynamic_quantizer.py +736 -0
cortex/template_registry/__init__.py +15 -0
cortex/template_registry/auto_detector.py +144 -0
cortex/template_registry/config_manager.py +234 -0
cortex/template_registry/interactive.py +260 -0
cortex/template_registry/registry.py +347 -0
cortex/template_registry/template_profiles/__init__.py +5 -0
cortex/template_registry/template_profiles/base.py +142 -0
cortex/template_registry/template_profiles/complex/__init__.py +5 -0
cortex/template_registry/template_profiles/complex/reasoning.py +263 -0
cortex/template_registry/template_profiles/standard/__init__.py +9 -0
cortex/template_registry/template_profiles/standard/alpaca.py +73 -0
cortex/template_registry/template_profiles/standard/chatml.py +82 -0
cortex/template_registry/template_profiles/standard/gemma.py +103 -0
cortex/template_registry/template_profiles/standard/llama.py +87 -0
cortex/template_registry/template_profiles/standard/simple.py +65 -0
cortex/ui/__init__.py +120 -0
cortex/ui/cli.py +1685 -0
cortex/ui/markdown_render.py +185 -0
cortex/ui/terminal_app.py +534 -0
cortex_llm-1.0.0.dist-info/METADATA +275 -0
cortex_llm-1.0.0.dist-info/RECORD +48 -0
cortex_llm-1.0.0.dist-info/WHEEL +5 -0
cortex_llm-1.0.0.dist-info/entry_points.txt +2 -0
cortex_llm-1.0.0.dist-info/licenses/LICENSE +21 -0
cortex_llm-1.0.0.dist-info/top_level.txt +1 -0

cortex/metal/__init__.py ADDED Viewed

@@ -0,0 +1,275 @@
+"""Metal optimization package for GPU acceleration on Apple Silicon.
+This package provides unified GPU acceleration for LLM inference on Apple Silicon.
+The recommended approach is to use MetalOptimizer for automatic backend selection.
+"""
+from typing import Dict, Any, Optional
+import platform
+import subprocess
+# Primary exports
+__all__ = [
+    # Unified optimizer (RECOMMENDED)
+    "MetalOptimizer",
+    "OptimizationConfig",
+    "Backend",
+    "InferenceSession",
+    # Core functionality
+    "MetalCapabilities",
+    "check_metal_support",
+    "get_metal_version",
+    "initialize_metal_optimizations",
+    # Memory management
+    "MemoryPool",
+    # Backend-specific (use MetalOptimizer instead for most cases)
+    "MPSOptimizer",
+    "MLXAccelerator",
+    # Performance monitoring
+    "PerformanceProfiler"
+]
+class MetalCapabilities:
+    """Metal capabilities detection and management."""
+    METAL_FEATURES = {
+        "metal3": {
+            "min_macos": "14.0",
+            "features": [
+                "mesh_shaders",
+                "function_pointers",
+                "ray_tracing",
+                "indirect_command_buffers",
+                "gpu_driven_pipeline"
+            ]
+        },
+        "metal2": {
+            "min_macos": "10.13",
+            "features": [
+                "argument_buffers",
+                "programmable_sample_positions",
+                "texture_read_write"
+            ]
+        }
+    }
+    APPLE_SILICON_OPTIMIZATION_FLAGS = {
+        "compiler_flags": [
+            "-O3",
+            "-ffast-math",
+            "-march=armv8.5-a+fp16+dotprod",
+            "-mtune=apple-silicon"
+        ],
+        "metal_compiler_flags": [
+            # Use macOS-appropriate Metal standard version
+            "-std=metal3.1",
+            "-O3",
+            "-ffast-math"
+        ],
+        "linker_flags": [
+            "-framework", "Metal",
+            "-framework", "MetalPerformanceShaders",
+            "-framework", "MetalPerformanceShadersGraph"
+        ]
+    }
+    @classmethod
+    def detect_capabilities(cls) -> Dict[str, Any]:
+        """Detect Metal capabilities on the system."""
+        if platform.system() != "Darwin":
+            return {"supported": False, "error": "Not running on macOS"}
+        capabilities = {
+            "supported": True,
+            "version": get_metal_version(),
+            "features": [],
+            "optimizations": {},
+            "gpu_family": cls._detect_gpu_family()
+        }
+        metal_version = capabilities["version"]
+        if metal_version and "Metal 3" in metal_version:
+            capabilities["features"] = cls.METAL_FEATURES["metal3"]["features"]
+        elif metal_version and "Metal 2" in metal_version:
+            capabilities["features"] = cls.METAL_FEATURES["metal2"]["features"]
+        # Always assign optimization profile based on detected GPU family
+        capabilities["optimizations"] = cls.get_optimization_profile(capabilities["gpu_family"])
+        return capabilities
+    @classmethod
+    def _detect_gpu_family(cls) -> str:
+        """Detect GPU family (apple5, apple6, apple7, apple8 for M1, M2, M3, M4)."""
+        try:
+            result = subprocess.run(
+                ["system_profiler", "SPDisplaysDataType"],
+                capture_output=True,
+                text=True,
+                check=True
+            )
+            output = result.stdout.lower()
+            if "apple m4" in output:
+                return "apple8"
+            elif "apple m3" in output:
+                return "apple7"
+            elif "apple m2" in output:
+                return "apple6"
+            elif "apple m1" in output:
+                return "apple5"
+            else:
+                return "unknown"
+        except:
+            return "unknown"
+    @classmethod
+    def get_optimization_profile(cls, gpu_family: str) -> Dict[str, Any]:
+        """Get optimization profile for specific GPU family."""
+        profiles = {
+            "apple8": {  # M4
+                "max_threads_per_threadgroup": 1024,
+                "max_total_threadgroup_memory": 32768,
+                "simd_width": 32,
+                "preferred_batch_size": 8,
+                "use_fused_operations": True,
+                "use_fast_math": True,
+                "tile_size": (16, 16),
+                "wave_size": 32,
+                "prefer_bfloat16": True,
+            },
+            "apple7": {  # M3
+                "max_threads_per_threadgroup": 1024,
+                "max_total_threadgroup_memory": 32768,
+                "simd_width": 32,
+                "preferred_batch_size": 4,
+                "use_fused_operations": True,
+                "use_fast_math": True,
+                "tile_size": (8, 8),
+                "wave_size": 32,
+                "prefer_bfloat16": True,
+            },
+            "apple6": {  # M2
+                "max_threads_per_threadgroup": 1024,
+                "max_total_threadgroup_memory": 32768,
+                "simd_width": 32,
+                "preferred_batch_size": 4,
+                "use_fused_operations": True,
+                "use_fast_math": True,
+                "tile_size": (8, 8),
+                "wave_size": 32,
+                "prefer_bfloat16": True,
+            },
+            "apple5": {  # M1
+                "max_threads_per_threadgroup": 1024,
+                "max_total_threadgroup_memory": 32768,
+                "simd_width": 32,
+                "preferred_batch_size": 2,
+                "use_fused_operations": False,
+                "use_fast_math": True,
+                "tile_size": (8, 8),
+                "wave_size": 32,
+                "prefer_bfloat16": False,
+            },
+            "default": {
+                "max_threads_per_threadgroup": 512,
+                "max_total_threadgroup_memory": 16384,
+                "simd_width": 32,
+                "preferred_batch_size": 2,
+                "use_fused_operations": False,
+                "use_fast_math": False,
+                "tile_size": (8, 8),
+                "wave_size": 32,
+                "prefer_bfloat16": False,
+            }
+        }
+        return profiles.get(gpu_family, profiles["default"])
+def check_metal_support() -> bool:
+    """Check if Metal is supported on this system."""
+    if platform.system() != "Darwin":
+        return False
+    try:
+        result = subprocess.run(
+            ["system_profiler", "SPDisplaysDataType"],
+            capture_output=True,
+            text=True,
+            check=True
+        )
+        return "Metal" in result.stdout
+    except:
+        return False
+def get_metal_version() -> Optional[str]:
+    """Get Metal version string."""
+    try:
+        result = subprocess.run(
+            ["xcrun", "--show-sdk-version"],
+            capture_output=True,
+            text=True,
+            check=True
+        )
+        sdk_version = result.stdout.strip()
+        major_version = int(sdk_version.split('.')[0])
+        if major_version >= 14:
+            return "Metal 3"
+        elif major_version >= 10:
+            return "Metal 2"
+        else:
+            return "Metal 1"
+    except:
+        return None
+def initialize_metal_optimizations() -> Dict[str, Any]:
+    """Initialize Metal optimizations for the current system."""
+    if not check_metal_support():
+        raise RuntimeError("Metal is not supported on this system")
+    capabilities = MetalCapabilities.detect_capabilities()
+    if not capabilities["supported"]:
+        raise RuntimeError(f"Metal not supported: {capabilities.get('error', 'Unknown error')}")
+    gpu_family = capabilities["gpu_family"]
+    optimization_profile = MetalCapabilities.get_optimization_profile(gpu_family)
+    return {
+        "capabilities": capabilities,
+        "optimization_profile": optimization_profile,
+        "gpu_family": gpu_family,
+        "metal_version": capabilities["version"]
+    }
+# Import new unified optimizer (RECOMMENDED)
+try:
+    from cortex.metal.optimizer import (
+        MetalOptimizer,
+        OptimizationConfig,
+        Backend,
+        InferenceSession
+    )
+except ImportError as e:
+    # If optimizer fails to import, provide a helpful message
+    import warnings
+    warnings.warn(
+        f"Failed to import MetalOptimizer: {e}. "
+        "Some dependencies may be missing.",
+        ImportWarning
+    )
+    MetalOptimizer = None
+    OptimizationConfig = None
+    Backend = None
+    InferenceSession = None
+# Import existing components
+from cortex.metal.memory_pool import MemoryPool
+from cortex.metal.mps_optimizer import MPSOptimizer
+from cortex.metal.mlx_accelerator import MLXAccelerator
+from cortex.metal.performance_profiler import PerformanceProfiler

cortex/metal/gpu_validator.py ADDED Viewed

@@ -0,0 +1,177 @@
+"""GPU validation and capability detection for Metal."""
+import subprocess
+import platform
+from dataclasses import dataclass
+from typing import Optional
+@dataclass
+class GPUInfo:
+    """GPU information and capabilities."""
+    gpu_family: str = "unknown"  # apple5 (M1), apple6 (M2), apple7 (M3), apple8 (M4)
+    supports_bfloat16: bool = False
+    supports_simdgroup_matrix: bool = False
+    supports_tile_functions: bool = False
+    supports_mpp: bool = False
+    is_apple_silicon: bool = False
+    metal_version: str = "3.0"
+class GPUValidator:
+    """Validates GPU capabilities for Metal optimization."""
+    def __init__(self):
+        """Initialize GPU validator."""
+        self.gpu_info = None
+        self.validation_passed = False
+    def validate(self) -> bool:
+        """
+        Validate GPU and detect capabilities.
+        Returns:
+            True if GPU is validated and ready
+        """
+        self.gpu_info = self._detect_gpu()
+        self.validation_passed = self.gpu_info is not None
+        return self.validation_passed
+    def _detect_gpu(self) -> Optional[GPUInfo]:
+        """
+        Detect GPU model and capabilities.
+        Returns:
+            GPUInfo object with detected capabilities
+        """
+        info = GPUInfo()
+        if platform.system() != "Darwin":
+            return None
+        try:
+            # Use system_profiler to detect GPU
+            result = subprocess.run(
+                ["system_profiler", "SPDisplaysDataType"],
+                capture_output=True,
+                text=True,
+                timeout=5
+            )
+            if result.returncode == 0:
+                output = result.stdout.lower()
+                # Detect Apple Silicon
+                if "apple m" in output or "apple silicon" in output:
+                    info.is_apple_silicon = True
+                    # Detect specific chip
+                    if "m4" in output:
+                        info.gpu_family = "apple8"
+                        info.supports_bfloat16 = True
+                        info.supports_tile_functions = True
+                        info.metal_version = "3.1"
+                    elif "m3" in output:
+                        info.gpu_family = "apple7"
+                        info.supports_bfloat16 = True
+                        info.supports_tile_functions = True
+                        info.metal_version = "3.1"
+                    elif "m2" in output:
+                        info.gpu_family = "apple6"
+                        info.supports_bfloat16 = True
+                        info.metal_version = "3.0"
+                    elif "m1" in output:
+                        info.gpu_family = "apple5"
+                        info.supports_bfloat16 = False
+                        info.metal_version = "3.0"
+                    # All Apple Silicon supports SIMD operations
+                    info.supports_simdgroup_matrix = False  # Not in public API
+                    info.supports_mpp = True
+                return info
+        except (subprocess.TimeoutExpired, Exception) as e:
+            # Fallback detection
+            try:
+                # Try sysctl for chip detection
+                result = subprocess.run(
+                    ["sysctl", "-n", "machdep.cpu.brand_string"],
+                    capture_output=True,
+                    text=True
+                )
+                if "Apple" in result.stdout:
+                    info.is_apple_silicon = True
+                    info.gpu_family = "apple5"  # Conservative default
+                    return info
+            except:
+                pass
+        return info if info.is_apple_silicon else None
+    def check_bfloat16_support(self) -> bool:
+        """
+        Check if current GPU supports bfloat16.
+        Returns:
+            True if bfloat16 is supported
+        """
+        if not self.gpu_info:
+            self.validate()
+        return self.gpu_info.supports_bfloat16 if self.gpu_info else False
+    def get_gpu_family(self) -> str:
+        """
+        Get GPU family identifier.
+        Returns:
+            GPU family string (apple5, apple6, etc.)
+        """
+        if not self.gpu_info:
+            self.validate()
+        return self.gpu_info.gpu_family if self.gpu_info else "unknown"
+    def get_metal_version(self) -> str:
+        """
+        Get recommended Metal version for this GPU.
+        Returns:
+            Metal version string
+        """
+        if not self.gpu_info:
+            self.validate()
+        return self.gpu_info.metal_version if self.gpu_info else "3.0"
+    def get_capabilities_summary(self) -> dict:
+        """
+        Get summary of GPU capabilities.
+        Returns:
+            Dictionary with capability flags
+        """
+        if not self.gpu_info:
+            self.validate()
+        if self.gpu_info:
+            return {
+                "gpu_family": self.gpu_info.gpu_family,
+                "is_apple_silicon": self.gpu_info.is_apple_silicon,
+                "supports_bfloat16": self.gpu_info.supports_bfloat16,
+                "metal_version": self.gpu_info.metal_version,
+                "validation_passed": self.validation_passed
+            }
+        return {
+            "gpu_family": "unknown",
+            "is_apple_silicon": False,
+            "supports_bfloat16": False,
+            "metal_version": "3.0",
+            "validation_passed": False
+        }
+# Convenience function for quick GPU validation
+def validate_gpu() -> bool:
+    """Quick validation function."""
+    validator = GPUValidator()
+    return validator.validate()