cortex-llm 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. cortex/__init__.py +73 -0
  2. cortex/__main__.py +83 -0
  3. cortex/config.py +329 -0
  4. cortex/conversation_manager.py +468 -0
  5. cortex/fine_tuning/__init__.py +8 -0
  6. cortex/fine_tuning/dataset.py +332 -0
  7. cortex/fine_tuning/mlx_lora_trainer.py +502 -0
  8. cortex/fine_tuning/trainer.py +957 -0
  9. cortex/fine_tuning/wizard.py +707 -0
  10. cortex/gpu_validator.py +467 -0
  11. cortex/inference_engine.py +727 -0
  12. cortex/metal/__init__.py +275 -0
  13. cortex/metal/gpu_validator.py +177 -0
  14. cortex/metal/memory_pool.py +886 -0
  15. cortex/metal/mlx_accelerator.py +678 -0
  16. cortex/metal/mlx_converter.py +638 -0
  17. cortex/metal/mps_optimizer.py +417 -0
  18. cortex/metal/optimizer.py +665 -0
  19. cortex/metal/performance_profiler.py +364 -0
  20. cortex/model_downloader.py +130 -0
  21. cortex/model_manager.py +2187 -0
  22. cortex/quantization/__init__.py +5 -0
  23. cortex/quantization/dynamic_quantizer.py +736 -0
  24. cortex/template_registry/__init__.py +15 -0
  25. cortex/template_registry/auto_detector.py +144 -0
  26. cortex/template_registry/config_manager.py +234 -0
  27. cortex/template_registry/interactive.py +260 -0
  28. cortex/template_registry/registry.py +347 -0
  29. cortex/template_registry/template_profiles/__init__.py +5 -0
  30. cortex/template_registry/template_profiles/base.py +142 -0
  31. cortex/template_registry/template_profiles/complex/__init__.py +5 -0
  32. cortex/template_registry/template_profiles/complex/reasoning.py +263 -0
  33. cortex/template_registry/template_profiles/standard/__init__.py +9 -0
  34. cortex/template_registry/template_profiles/standard/alpaca.py +73 -0
  35. cortex/template_registry/template_profiles/standard/chatml.py +82 -0
  36. cortex/template_registry/template_profiles/standard/gemma.py +103 -0
  37. cortex/template_registry/template_profiles/standard/llama.py +87 -0
  38. cortex/template_registry/template_profiles/standard/simple.py +65 -0
  39. cortex/ui/__init__.py +120 -0
  40. cortex/ui/cli.py +1685 -0
  41. cortex/ui/markdown_render.py +185 -0
  42. cortex/ui/terminal_app.py +534 -0
  43. cortex_llm-1.0.0.dist-info/METADATA +275 -0
  44. cortex_llm-1.0.0.dist-info/RECORD +48 -0
  45. cortex_llm-1.0.0.dist-info/WHEEL +5 -0
  46. cortex_llm-1.0.0.dist-info/entry_points.txt +2 -0
  47. cortex_llm-1.0.0.dist-info/licenses/LICENSE +21 -0
  48. cortex_llm-1.0.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,275 @@
1
+ """Metal optimization package for GPU acceleration on Apple Silicon.
2
+
3
+ This package provides unified GPU acceleration for LLM inference on Apple Silicon.
4
+ The recommended approach is to use MetalOptimizer for automatic backend selection.
5
+ """
6
+
7
+ from typing import Dict, Any, Optional
8
+ import platform
9
+ import subprocess
10
+
11
+ # Primary exports
12
+ __all__ = [
13
+ # Unified optimizer (RECOMMENDED)
14
+ "MetalOptimizer",
15
+ "OptimizationConfig",
16
+ "Backend",
17
+ "InferenceSession",
18
+
19
+ # Core functionality
20
+ "MetalCapabilities",
21
+ "check_metal_support",
22
+ "get_metal_version",
23
+ "initialize_metal_optimizations",
24
+
25
+ # Memory management
26
+ "MemoryPool",
27
+
28
+ # Backend-specific (use MetalOptimizer instead for most cases)
29
+ "MPSOptimizer",
30
+ "MLXAccelerator",
31
+
32
+ # Performance monitoring
33
+ "PerformanceProfiler"
34
+ ]
35
+
36
+ class MetalCapabilities:
37
+ """Metal capabilities detection and management."""
38
+
39
+ METAL_FEATURES = {
40
+ "metal3": {
41
+ "min_macos": "14.0",
42
+ "features": [
43
+ "mesh_shaders",
44
+ "function_pointers",
45
+ "ray_tracing",
46
+ "indirect_command_buffers",
47
+ "gpu_driven_pipeline"
48
+ ]
49
+ },
50
+ "metal2": {
51
+ "min_macos": "10.13",
52
+ "features": [
53
+ "argument_buffers",
54
+ "programmable_sample_positions",
55
+ "texture_read_write"
56
+ ]
57
+ }
58
+ }
59
+
60
+ APPLE_SILICON_OPTIMIZATION_FLAGS = {
61
+ "compiler_flags": [
62
+ "-O3",
63
+ "-ffast-math",
64
+ "-march=armv8.5-a+fp16+dotprod",
65
+ "-mtune=apple-silicon"
66
+ ],
67
+ "metal_compiler_flags": [
68
+ # Use macOS-appropriate Metal standard version
69
+ "-std=metal3.1",
70
+ "-O3",
71
+ "-ffast-math"
72
+ ],
73
+ "linker_flags": [
74
+ "-framework", "Metal",
75
+ "-framework", "MetalPerformanceShaders",
76
+ "-framework", "MetalPerformanceShadersGraph"
77
+ ]
78
+ }
79
+
80
+ @classmethod
81
+ def detect_capabilities(cls) -> Dict[str, Any]:
82
+ """Detect Metal capabilities on the system."""
83
+ if platform.system() != "Darwin":
84
+ return {"supported": False, "error": "Not running on macOS"}
85
+
86
+ capabilities = {
87
+ "supported": True,
88
+ "version": get_metal_version(),
89
+ "features": [],
90
+ "optimizations": {},
91
+ "gpu_family": cls._detect_gpu_family()
92
+ }
93
+
94
+ metal_version = capabilities["version"]
95
+ if metal_version and "Metal 3" in metal_version:
96
+ capabilities["features"] = cls.METAL_FEATURES["metal3"]["features"]
97
+ elif metal_version and "Metal 2" in metal_version:
98
+ capabilities["features"] = cls.METAL_FEATURES["metal2"]["features"]
99
+
100
+ # Always assign optimization profile based on detected GPU family
101
+ capabilities["optimizations"] = cls.get_optimization_profile(capabilities["gpu_family"])
102
+
103
+ return capabilities
104
+
105
+ @classmethod
106
+ def _detect_gpu_family(cls) -> str:
107
+ """Detect GPU family (apple5, apple6, apple7, apple8 for M1, M2, M3, M4)."""
108
+ try:
109
+ result = subprocess.run(
110
+ ["system_profiler", "SPDisplaysDataType"],
111
+ capture_output=True,
112
+ text=True,
113
+ check=True
114
+ )
115
+
116
+ output = result.stdout.lower()
117
+ if "apple m4" in output:
118
+ return "apple8"
119
+ elif "apple m3" in output:
120
+ return "apple7"
121
+ elif "apple m2" in output:
122
+ return "apple6"
123
+ elif "apple m1" in output:
124
+ return "apple5"
125
+ else:
126
+ return "unknown"
127
+ except:
128
+ return "unknown"
129
+
130
+ @classmethod
131
+ def get_optimization_profile(cls, gpu_family: str) -> Dict[str, Any]:
132
+ """Get optimization profile for specific GPU family."""
133
+ profiles = {
134
+ "apple8": { # M4
135
+ "max_threads_per_threadgroup": 1024,
136
+ "max_total_threadgroup_memory": 32768,
137
+ "simd_width": 32,
138
+ "preferred_batch_size": 8,
139
+ "use_fused_operations": True,
140
+ "use_fast_math": True,
141
+ "tile_size": (16, 16),
142
+ "wave_size": 32,
143
+ "prefer_bfloat16": True,
144
+ },
145
+ "apple7": { # M3
146
+ "max_threads_per_threadgroup": 1024,
147
+ "max_total_threadgroup_memory": 32768,
148
+ "simd_width": 32,
149
+ "preferred_batch_size": 4,
150
+ "use_fused_operations": True,
151
+ "use_fast_math": True,
152
+ "tile_size": (8, 8),
153
+ "wave_size": 32,
154
+ "prefer_bfloat16": True,
155
+ },
156
+ "apple6": { # M2
157
+ "max_threads_per_threadgroup": 1024,
158
+ "max_total_threadgroup_memory": 32768,
159
+ "simd_width": 32,
160
+ "preferred_batch_size": 4,
161
+ "use_fused_operations": True,
162
+ "use_fast_math": True,
163
+ "tile_size": (8, 8),
164
+ "wave_size": 32,
165
+ "prefer_bfloat16": True,
166
+ },
167
+ "apple5": { # M1
168
+ "max_threads_per_threadgroup": 1024,
169
+ "max_total_threadgroup_memory": 32768,
170
+ "simd_width": 32,
171
+ "preferred_batch_size": 2,
172
+ "use_fused_operations": False,
173
+ "use_fast_math": True,
174
+ "tile_size": (8, 8),
175
+ "wave_size": 32,
176
+ "prefer_bfloat16": False,
177
+ },
178
+ "default": {
179
+ "max_threads_per_threadgroup": 512,
180
+ "max_total_threadgroup_memory": 16384,
181
+ "simd_width": 32,
182
+ "preferred_batch_size": 2,
183
+ "use_fused_operations": False,
184
+ "use_fast_math": False,
185
+ "tile_size": (8, 8),
186
+ "wave_size": 32,
187
+ "prefer_bfloat16": False,
188
+ }
189
+ }
190
+
191
+ return profiles.get(gpu_family, profiles["default"])
192
+
193
+ def check_metal_support() -> bool:
194
+ """Check if Metal is supported on this system."""
195
+ if platform.system() != "Darwin":
196
+ return False
197
+
198
+ try:
199
+ result = subprocess.run(
200
+ ["system_profiler", "SPDisplaysDataType"],
201
+ capture_output=True,
202
+ text=True,
203
+ check=True
204
+ )
205
+ return "Metal" in result.stdout
206
+ except:
207
+ return False
208
+
209
+ def get_metal_version() -> Optional[str]:
210
+ """Get Metal version string."""
211
+ try:
212
+ result = subprocess.run(
213
+ ["xcrun", "--show-sdk-version"],
214
+ capture_output=True,
215
+ text=True,
216
+ check=True
217
+ )
218
+ sdk_version = result.stdout.strip()
219
+
220
+ major_version = int(sdk_version.split('.')[0])
221
+ if major_version >= 14:
222
+ return "Metal 3"
223
+ elif major_version >= 10:
224
+ return "Metal 2"
225
+ else:
226
+ return "Metal 1"
227
+ except:
228
+ return None
229
+
230
+ def initialize_metal_optimizations() -> Dict[str, Any]:
231
+ """Initialize Metal optimizations for the current system."""
232
+ if not check_metal_support():
233
+ raise RuntimeError("Metal is not supported on this system")
234
+
235
+ capabilities = MetalCapabilities.detect_capabilities()
236
+
237
+ if not capabilities["supported"]:
238
+ raise RuntimeError(f"Metal not supported: {capabilities.get('error', 'Unknown error')}")
239
+
240
+ gpu_family = capabilities["gpu_family"]
241
+ optimization_profile = MetalCapabilities.get_optimization_profile(gpu_family)
242
+
243
+ return {
244
+ "capabilities": capabilities,
245
+ "optimization_profile": optimization_profile,
246
+ "gpu_family": gpu_family,
247
+ "metal_version": capabilities["version"]
248
+ }
249
+
250
+ # Import new unified optimizer (RECOMMENDED)
251
+ try:
252
+ from cortex.metal.optimizer import (
253
+ MetalOptimizer,
254
+ OptimizationConfig,
255
+ Backend,
256
+ InferenceSession
257
+ )
258
+ except ImportError as e:
259
+ # If optimizer fails to import, provide a helpful message
260
+ import warnings
261
+ warnings.warn(
262
+ f"Failed to import MetalOptimizer: {e}. "
263
+ "Some dependencies may be missing.",
264
+ ImportWarning
265
+ )
266
+ MetalOptimizer = None
267
+ OptimizationConfig = None
268
+ Backend = None
269
+ InferenceSession = None
270
+
271
+ # Import existing components
272
+ from cortex.metal.memory_pool import MemoryPool
273
+ from cortex.metal.mps_optimizer import MPSOptimizer
274
+ from cortex.metal.mlx_accelerator import MLXAccelerator
275
+ from cortex.metal.performance_profiler import PerformanceProfiler
@@ -0,0 +1,177 @@
1
+ """GPU validation and capability detection for Metal."""
2
+
3
+ import subprocess
4
+ import platform
5
+ from dataclasses import dataclass
6
+ from typing import Optional
7
+
8
+ @dataclass
9
+ class GPUInfo:
10
+ """GPU information and capabilities."""
11
+ gpu_family: str = "unknown" # apple5 (M1), apple6 (M2), apple7 (M3), apple8 (M4)
12
+ supports_bfloat16: bool = False
13
+ supports_simdgroup_matrix: bool = False
14
+ supports_tile_functions: bool = False
15
+ supports_mpp: bool = False
16
+ is_apple_silicon: bool = False
17
+ metal_version: str = "3.0"
18
+
19
+ class GPUValidator:
20
+ """Validates GPU capabilities for Metal optimization."""
21
+
22
+ def __init__(self):
23
+ """Initialize GPU validator."""
24
+ self.gpu_info = None
25
+ self.validation_passed = False
26
+
27
+ def validate(self) -> bool:
28
+ """
29
+ Validate GPU and detect capabilities.
30
+
31
+ Returns:
32
+ True if GPU is validated and ready
33
+ """
34
+ self.gpu_info = self._detect_gpu()
35
+ self.validation_passed = self.gpu_info is not None
36
+ return self.validation_passed
37
+
38
+ def _detect_gpu(self) -> Optional[GPUInfo]:
39
+ """
40
+ Detect GPU model and capabilities.
41
+
42
+ Returns:
43
+ GPUInfo object with detected capabilities
44
+ """
45
+ info = GPUInfo()
46
+
47
+ if platform.system() != "Darwin":
48
+ return None
49
+
50
+ try:
51
+ # Use system_profiler to detect GPU
52
+ result = subprocess.run(
53
+ ["system_profiler", "SPDisplaysDataType"],
54
+ capture_output=True,
55
+ text=True,
56
+ timeout=5
57
+ )
58
+
59
+ if result.returncode == 0:
60
+ output = result.stdout.lower()
61
+
62
+ # Detect Apple Silicon
63
+ if "apple m" in output or "apple silicon" in output:
64
+ info.is_apple_silicon = True
65
+
66
+ # Detect specific chip
67
+ if "m4" in output:
68
+ info.gpu_family = "apple8"
69
+ info.supports_bfloat16 = True
70
+ info.supports_tile_functions = True
71
+ info.metal_version = "3.1"
72
+ elif "m3" in output:
73
+ info.gpu_family = "apple7"
74
+ info.supports_bfloat16 = True
75
+ info.supports_tile_functions = True
76
+ info.metal_version = "3.1"
77
+ elif "m2" in output:
78
+ info.gpu_family = "apple6"
79
+ info.supports_bfloat16 = True
80
+ info.metal_version = "3.0"
81
+ elif "m1" in output:
82
+ info.gpu_family = "apple5"
83
+ info.supports_bfloat16 = False
84
+ info.metal_version = "3.0"
85
+
86
+ # All Apple Silicon supports SIMD operations
87
+ info.supports_simdgroup_matrix = False # Not in public API
88
+ info.supports_mpp = True
89
+
90
+ return info
91
+
92
+ except (subprocess.TimeoutExpired, Exception) as e:
93
+ # Fallback detection
94
+ try:
95
+ # Try sysctl for chip detection
96
+ result = subprocess.run(
97
+ ["sysctl", "-n", "machdep.cpu.brand_string"],
98
+ capture_output=True,
99
+ text=True
100
+ )
101
+ if "Apple" in result.stdout:
102
+ info.is_apple_silicon = True
103
+ info.gpu_family = "apple5" # Conservative default
104
+ return info
105
+ except:
106
+ pass
107
+
108
+ return info if info.is_apple_silicon else None
109
+
110
+ def check_bfloat16_support(self) -> bool:
111
+ """
112
+ Check if current GPU supports bfloat16.
113
+
114
+ Returns:
115
+ True if bfloat16 is supported
116
+ """
117
+ if not self.gpu_info:
118
+ self.validate()
119
+
120
+ return self.gpu_info.supports_bfloat16 if self.gpu_info else False
121
+
122
+ def get_gpu_family(self) -> str:
123
+ """
124
+ Get GPU family identifier.
125
+
126
+ Returns:
127
+ GPU family string (apple5, apple6, etc.)
128
+ """
129
+ if not self.gpu_info:
130
+ self.validate()
131
+
132
+ return self.gpu_info.gpu_family if self.gpu_info else "unknown"
133
+
134
+ def get_metal_version(self) -> str:
135
+ """
136
+ Get recommended Metal version for this GPU.
137
+
138
+ Returns:
139
+ Metal version string
140
+ """
141
+ if not self.gpu_info:
142
+ self.validate()
143
+
144
+ return self.gpu_info.metal_version if self.gpu_info else "3.0"
145
+
146
+ def get_capabilities_summary(self) -> dict:
147
+ """
148
+ Get summary of GPU capabilities.
149
+
150
+ Returns:
151
+ Dictionary with capability flags
152
+ """
153
+ if not self.gpu_info:
154
+ self.validate()
155
+
156
+ if self.gpu_info:
157
+ return {
158
+ "gpu_family": self.gpu_info.gpu_family,
159
+ "is_apple_silicon": self.gpu_info.is_apple_silicon,
160
+ "supports_bfloat16": self.gpu_info.supports_bfloat16,
161
+ "metal_version": self.gpu_info.metal_version,
162
+ "validation_passed": self.validation_passed
163
+ }
164
+
165
+ return {
166
+ "gpu_family": "unknown",
167
+ "is_apple_silicon": False,
168
+ "supports_bfloat16": False,
169
+ "metal_version": "3.0",
170
+ "validation_passed": False
171
+ }
172
+
173
+ # Convenience function for quick GPU validation
174
+ def validate_gpu() -> bool:
175
+ """Quick validation function."""
176
+ validator = GPUValidator()
177
+ return validator.validate()