cortex-llm 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. cortex/__init__.py +73 -0
  2. cortex/__main__.py +83 -0
  3. cortex/config.py +329 -0
  4. cortex/conversation_manager.py +468 -0
  5. cortex/fine_tuning/__init__.py +8 -0
  6. cortex/fine_tuning/dataset.py +332 -0
  7. cortex/fine_tuning/mlx_lora_trainer.py +502 -0
  8. cortex/fine_tuning/trainer.py +957 -0
  9. cortex/fine_tuning/wizard.py +707 -0
  10. cortex/gpu_validator.py +467 -0
  11. cortex/inference_engine.py +727 -0
  12. cortex/metal/__init__.py +275 -0
  13. cortex/metal/gpu_validator.py +177 -0
  14. cortex/metal/memory_pool.py +886 -0
  15. cortex/metal/mlx_accelerator.py +678 -0
  16. cortex/metal/mlx_converter.py +638 -0
  17. cortex/metal/mps_optimizer.py +417 -0
  18. cortex/metal/optimizer.py +665 -0
  19. cortex/metal/performance_profiler.py +364 -0
  20. cortex/model_downloader.py +130 -0
  21. cortex/model_manager.py +2187 -0
  22. cortex/quantization/__init__.py +5 -0
  23. cortex/quantization/dynamic_quantizer.py +736 -0
  24. cortex/template_registry/__init__.py +15 -0
  25. cortex/template_registry/auto_detector.py +144 -0
  26. cortex/template_registry/config_manager.py +234 -0
  27. cortex/template_registry/interactive.py +260 -0
  28. cortex/template_registry/registry.py +347 -0
  29. cortex/template_registry/template_profiles/__init__.py +5 -0
  30. cortex/template_registry/template_profiles/base.py +142 -0
  31. cortex/template_registry/template_profiles/complex/__init__.py +5 -0
  32. cortex/template_registry/template_profiles/complex/reasoning.py +263 -0
  33. cortex/template_registry/template_profiles/standard/__init__.py +9 -0
  34. cortex/template_registry/template_profiles/standard/alpaca.py +73 -0
  35. cortex/template_registry/template_profiles/standard/chatml.py +82 -0
  36. cortex/template_registry/template_profiles/standard/gemma.py +103 -0
  37. cortex/template_registry/template_profiles/standard/llama.py +87 -0
  38. cortex/template_registry/template_profiles/standard/simple.py +65 -0
  39. cortex/ui/__init__.py +120 -0
  40. cortex/ui/cli.py +1685 -0
  41. cortex/ui/markdown_render.py +185 -0
  42. cortex/ui/terminal_app.py +534 -0
  43. cortex_llm-1.0.0.dist-info/METADATA +275 -0
  44. cortex_llm-1.0.0.dist-info/RECORD +48 -0
  45. cortex_llm-1.0.0.dist-info/WHEEL +5 -0
  46. cortex_llm-1.0.0.dist-info/entry_points.txt +2 -0
  47. cortex_llm-1.0.0.dist-info/licenses/LICENSE +21 -0
  48. cortex_llm-1.0.0.dist-info/top_level.txt +1 -0
cortex/__init__.py ADDED
@@ -0,0 +1,73 @@
1
+ """
2
+ Cortex - GPU-Accelerated LLM Terminal for Apple Silicon
3
+
4
+ A high-performance terminal interface for running Hugging Face LLMs locally
5
+ with exclusive GPU acceleration via Metal Performance Shaders (MPS) and MLX.
6
+ """
7
+
8
+ __version__ = "1.0.0"
9
+ __author__ = "Cortex Development Team"
10
+ __license__ = "MIT"
11
+
12
+ from typing import Optional, Dict, Any
13
+ import platform
14
+ import sys
15
+
16
+ MINIMUM_PYTHON_VERSION = (3, 11)
17
+ SUPPORTED_PLATFORM = "darwin"
18
+
19
+ def verify_system_requirements() -> Dict[str, Any]:
20
+ """Verify that the system meets Cortex requirements."""
21
+ requirements = {
22
+ "python_version": sys.version_info >= MINIMUM_PYTHON_VERSION,
23
+ "platform": platform.system().lower() == SUPPORTED_PLATFORM,
24
+ "architecture": platform.machine() == "arm64",
25
+ "errors": []
26
+ }
27
+
28
+ if not requirements["python_version"]:
29
+ requirements["errors"].append(
30
+ f"Python {MINIMUM_PYTHON_VERSION[0]}.{MINIMUM_PYTHON_VERSION[1]}+ required, "
31
+ f"found {sys.version_info.major}.{sys.version_info.minor}"
32
+ )
33
+
34
+ if not requirements["platform"]:
35
+ requirements["errors"].append(
36
+ f"macOS required, found {platform.system()}"
37
+ )
38
+
39
+ if not requirements["architecture"]:
40
+ requirements["errors"].append(
41
+ f"ARM64 architecture required, found {platform.machine()}"
42
+ )
43
+
44
+ requirements["valid"] = len(requirements["errors"]) == 0
45
+ return requirements
46
+
47
+ def initialize_cortex() -> bool:
48
+ """Initialize Cortex and verify system compatibility."""
49
+ requirements = verify_system_requirements()
50
+
51
+ if not requirements["valid"]:
52
+ for error in requirements["errors"]:
53
+ print(f"❌ {error}", file=sys.stderr)
54
+ return False
55
+
56
+ return True
57
+
58
+ from cortex.config import Config
59
+ from cortex.gpu_validator import GPUValidator
60
+ from cortex.model_manager import ModelManager
61
+ from cortex.inference_engine import InferenceEngine
62
+ from cortex.conversation_manager import ConversationManager
63
+
64
+ __all__ = [
65
+ "__version__",
66
+ "Config",
67
+ "GPUValidator",
68
+ "ModelManager",
69
+ "InferenceEngine",
70
+ "ConversationManager",
71
+ "initialize_cortex",
72
+ "verify_system_requirements"
73
+ ]
cortex/__main__.py ADDED
@@ -0,0 +1,83 @@
1
+ """Main entry point for Cortex."""
2
+
3
+ import sys
4
+ from pathlib import Path
5
+ import os
6
+ import warnings
7
+
8
+ # Disable multiprocessing resource tracking before any imports that might use it
9
+ # This prevents the semaphore leak warning from transformers library
10
+ os.environ['PYTHONWARNINGS'] = 'ignore::UserWarning:multiprocessing.resource_tracker'
11
+
12
+ # Alternative: Monkey-patch the resource tracker before it's used
13
+ try:
14
+ from multiprocessing import resource_tracker
15
+ def dummy_register(*args, **kwargs):
16
+ pass
17
+ def dummy_unregister(*args, **kwargs):
18
+ pass
19
+ resource_tracker.register = dummy_register
20
+ resource_tracker.unregister = dummy_unregister
21
+ except ImportError:
22
+ pass
23
+
24
+ from cortex.config import Config
25
+ from cortex.gpu_validator import GPUValidator
26
+ from cortex.model_manager import ModelManager
27
+ from cortex.inference_engine import InferenceEngine
28
+ from cortex.conversation_manager import ConversationManager
29
+ from cortex.ui.cli import CortexCLI
30
+
31
+
32
+ def main():
33
+ """Main entry point."""
34
+
35
+ inference_engine = None
36
+ try:
37
+ # Load configuration
38
+ config = Config()
39
+
40
+ # Initialize GPU validator
41
+ gpu_validator = GPUValidator()
42
+
43
+ # Validate GPU
44
+ is_valid, gpu_info, errors = gpu_validator.validate()
45
+ if not is_valid:
46
+ print("Error: GPU validation failed. Cortex requires Apple Silicon with Metal support.", file=sys.stderr)
47
+ for error in errors:
48
+ print(f" - {error}", file=sys.stderr)
49
+ sys.exit(1)
50
+
51
+ # Initialize components
52
+ model_manager = ModelManager(config, gpu_validator)
53
+ inference_engine = InferenceEngine(config, model_manager)
54
+ conversation_manager = ConversationManager(config)
55
+
56
+ # Create and run the CLI
57
+ cli = CortexCLI(
58
+ config=config,
59
+ gpu_validator=gpu_validator,
60
+ model_manager=model_manager,
61
+ inference_engine=inference_engine,
62
+ conversation_manager=conversation_manager
63
+ )
64
+
65
+ cli.run()
66
+ finally:
67
+ # Clean up resources
68
+ if inference_engine is not None and hasattr(inference_engine, 'memory_pool') and inference_engine.memory_pool:
69
+ inference_engine.memory_pool.cleanup()
70
+
71
+ # Force PyTorch cleanup
72
+ try:
73
+ import torch
74
+ if torch.backends.mps.is_available():
75
+ torch.mps.synchronize()
76
+ if hasattr(torch.mps, 'empty_cache'):
77
+ torch.mps.empty_cache()
78
+ except Exception:
79
+ pass # Ignore cleanup errors
80
+
81
+
82
+ if __name__ == "__main__":
83
+ main()
cortex/config.py ADDED
@@ -0,0 +1,329 @@
1
+ """Configuration management for Cortex."""
2
+
3
+ import os
4
+ import sys
5
+ from pathlib import Path
6
+ from typing import Dict, Any, Optional, List, Literal
7
+ from dataclasses import dataclass, field
8
+ import yaml
9
+ from pydantic import BaseModel, Field, field_validator
10
+
11
+ class GPUConfig(BaseModel):
12
+ """GPU-specific configuration for Apple Silicon."""
13
+ compute_backend: Literal["metal"] = "metal"
14
+ force_gpu: Literal[True] = True
15
+ metal_performance_shaders: bool = True
16
+ mlx_backend: bool = True
17
+ gpu_memory_fraction: float = Field(default=0.85, ge=0.1, le=1.0)
18
+ gpu_cores: int = Field(default=16, ge=1, le=128)
19
+ metal_api_version: int = Field(default=3, ge=3)
20
+ shader_cache: Path = Field(default_factory=lambda: Path.home() / ".cortex" / "metal_shaders")
21
+ compile_shaders_on_start: bool = True # Fixed and enabled!
22
+ gpu_optimization_level: str = Field(default="maximum")
23
+
24
+ @field_validator("compute_backend")
25
+ def validate_backend(cls, v):
26
+ if v != "metal":
27
+ raise ValueError("Only 'metal' backend is supported for Apple Silicon GPU")
28
+ return v
29
+
30
+ @field_validator("gpu_cores")
31
+ def validate_gpu_cores(cls, v):
32
+ if v < 1 or v > 128:
33
+ raise ValueError("GPU cores must be between 1 and 128")
34
+ return v
35
+
36
+ class MemoryConfig(BaseModel):
37
+ """Memory management configuration."""
38
+ unified_memory: Literal[True] = True
39
+ max_gpu_memory: str = Field(default="20GB")
40
+ cpu_offload: Literal[False] = False
41
+ memory_pool_size: str = Field(default="20GB")
42
+ kv_cache_size: str = Field(default="2GB")
43
+ activation_memory: str = Field(default="2GB")
44
+
45
+ @field_validator("cpu_offload")
46
+ def validate_no_cpu_offload(cls, v):
47
+ if v:
48
+ raise ValueError("CPU offloading is not allowed - GPU only execution")
49
+ return v
50
+
51
+ def parse_memory_size(self, size_str: str) -> int:
52
+ """Convert memory size string to bytes."""
53
+ size_str = size_str.upper().strip()
54
+ if size_str.endswith("GB"):
55
+ return int(size_str[:-2]) * 1024 * 1024 * 1024
56
+ elif size_str.endswith("MB"):
57
+ return int(size_str[:-2]) * 1024 * 1024
58
+ else:
59
+ return int(size_str)
60
+
61
+ class PerformanceConfig(BaseModel):
62
+ """Performance settings."""
63
+ batch_size: int = Field(default=8, ge=1, le=32)
64
+ max_batch_size: int = Field(default=16, ge=1, le=64)
65
+ use_flash_attention: bool = True
66
+ use_fused_ops: bool = True
67
+ num_threads: int = Field(default=1, ge=1, le=4)
68
+ context_length: int = Field(default=32768, ge=512)
69
+ sliding_window_size: int = Field(default=4096, ge=512)
70
+
71
+ class InferenceConfig(BaseModel):
72
+ """Inference settings."""
73
+ temperature: float = Field(default=0.7, ge=0.0, le=2.0)
74
+ top_p: float = Field(default=0.95, ge=0.0, le=1.0)
75
+ top_k: int = Field(default=40, ge=0)
76
+ repetition_penalty: float = Field(default=1.1, ge=0.0, le=2.0)
77
+ max_tokens: int = Field(default=2048, ge=1)
78
+ stream_output: bool = True
79
+ seed: int = Field(default=-1)
80
+
81
+ class ModelConfig(BaseModel):
82
+ """Model configuration."""
83
+ model_path: Path = Field(default_factory=lambda: Path.home() / "models")
84
+ default_model: str = Field(default="")
85
+ last_used_model: str = Field(default="") # Track the last used model
86
+ model_cache_dir: Path = Field(default_factory=lambda: Path.home() / ".cortex" / "models")
87
+ preload_models: List[str] = Field(default_factory=list)
88
+ max_loaded_models: int = Field(default=3, ge=1, le=5)
89
+ lazy_load: bool = False
90
+ verify_gpu_compatibility: bool = True
91
+ default_quantization: str = Field(default="Q4_K_M")
92
+ supported_quantizations: List[str] = Field(
93
+ default_factory=lambda: ["Q4_K_M", "Q5_K_M", "Q6_K", "Q8_0"]
94
+ )
95
+ auto_quantize: bool = True
96
+ quantization_cache: Path = Field(default_factory=lambda: Path.home() / ".cortex" / "quantized_models")
97
+
98
+ class UIConfig(BaseModel):
99
+ """UI configuration."""
100
+ ui_theme: str = Field(default="default")
101
+ syntax_highlighting: bool = True
102
+ markdown_rendering: bool = True
103
+ show_performance_metrics: bool = True
104
+ show_gpu_utilization: bool = True
105
+ auto_scroll: bool = True
106
+ copy_on_select: bool = True
107
+ mouse_support: bool = True
108
+
109
+ class LoggingConfig(BaseModel):
110
+ """Logging configuration."""
111
+ log_level: str = Field(default="INFO")
112
+ log_file: Path = Field(default_factory=lambda: Path.home() / ".cortex" / "cortex.log")
113
+ log_rotation: str = Field(default="daily")
114
+ max_log_size: str = Field(default="100MB")
115
+ performance_logging: bool = True
116
+ gpu_metrics_interval: int = Field(default=1000, ge=100)
117
+
118
+ class ConversationConfig(BaseModel):
119
+ """Conversation settings."""
120
+ auto_save: bool = True
121
+ save_format: str = Field(default="json")
122
+ save_directory: Path = Field(default_factory=lambda: Path.home() / ".cortex" / "conversations")
123
+ max_conversation_history: int = Field(default=100, ge=1)
124
+ enable_branching: bool = True
125
+
126
+ class SystemConfig(BaseModel):
127
+ """System settings."""
128
+ startup_checks: List[str] = Field(
129
+ default_factory=lambda: [
130
+ "verify_metal_support",
131
+ "check_gpu_memory",
132
+ "validate_models",
133
+ "compile_shaders"
134
+ ]
135
+ )
136
+ shutdown_timeout: int = Field(default=5, ge=1)
137
+ crash_recovery: bool = True
138
+ auto_update_check: bool = False
139
+
140
+ class DeveloperConfig(BaseModel):
141
+ """Developer settings."""
142
+ debug_mode: bool = False
143
+ profile_inference: bool = False
144
+ metal_capture: bool = False
145
+ verbose_gpu_logs: bool = False
146
+
147
+ class PathsConfig(BaseModel):
148
+ """Path configuration."""
149
+ claude_md_path: Path = Field(default_factory=lambda: Path("./CLAUDE.md"))
150
+ templates_dir: Path = Field(default_factory=lambda: Path.home() / ".cortex" / "templates")
151
+ plugins_dir: Path = Field(default_factory=lambda: Path.home() / ".cortex" / "plugins")
152
+
153
+ class Config:
154
+ """Main configuration class for Cortex."""
155
+
156
+ def __init__(self, config_path: Optional[Path] = None):
157
+ """Initialize configuration."""
158
+ self.config_path = config_path or Path("config.yaml")
159
+ self._raw_config: Dict[str, Any] = {}
160
+
161
+ self.gpu: GPUConfig
162
+ self.memory: MemoryConfig
163
+ self.performance: PerformanceConfig
164
+ self.inference: InferenceConfig
165
+ self.model: ModelConfig
166
+ self.ui: UIConfig
167
+ self.logging: LoggingConfig
168
+ self.conversation: ConversationConfig
169
+ self.system: SystemConfig
170
+ self.developer: DeveloperConfig
171
+ self.paths: PathsConfig
172
+
173
+ self.load()
174
+
175
+ def load(self) -> None:
176
+ """Load configuration from YAML file."""
177
+ if not self.config_path.exists():
178
+ self._use_defaults()
179
+ return
180
+
181
+ try:
182
+ with open(self.config_path, 'r') as f:
183
+ self._raw_config = yaml.safe_load(f) or {}
184
+ except Exception as e:
185
+ print(f"Warning: Failed to load config from {self.config_path}: {e}")
186
+ self._use_defaults()
187
+ return
188
+
189
+ self._parse_config()
190
+
191
+ def _use_defaults(self) -> None:
192
+ """Use default configuration values."""
193
+ self.gpu = GPUConfig()
194
+ self.memory = MemoryConfig()
195
+ self.performance = PerformanceConfig()
196
+ self.inference = InferenceConfig()
197
+ self.model = ModelConfig()
198
+ self.ui = UIConfig()
199
+ self.logging = LoggingConfig()
200
+ self.conversation = ConversationConfig()
201
+ self.system = SystemConfig()
202
+ self.developer = DeveloperConfig()
203
+ self.paths = PathsConfig()
204
+
205
+ def _parse_config(self) -> None:
206
+ """Parse configuration from raw dictionary."""
207
+ try:
208
+ self.gpu = GPUConfig(**self._get_section({
209
+ k: v for k, v in self._raw_config.items()
210
+ if k in ["compute_backend", "force_gpu", "metal_performance_shaders",
211
+ "mlx_backend", "gpu_memory_fraction", "gpu_cores",
212
+ "metal_api_version", "shader_cache", "compile_shaders_on_start",
213
+ "gpu_optimization_level"]
214
+ }))
215
+
216
+ self.memory = MemoryConfig(**self._get_section({
217
+ k: v for k, v in self._raw_config.items()
218
+ if k in ["unified_memory", "max_gpu_memory", "cpu_offload",
219
+ "memory_pool_size", "kv_cache_size", "activation_memory"]
220
+ }))
221
+
222
+ self.performance = PerformanceConfig(**self._get_section({
223
+ k: v for k, v in self._raw_config.items()
224
+ if k in ["batch_size", "max_batch_size", "use_flash_attention",
225
+ "use_fused_ops", "num_threads", "context_length",
226
+ "sliding_window_size"]
227
+ }))
228
+
229
+ self.inference = InferenceConfig(**self._get_section({
230
+ k: v for k, v in self._raw_config.items()
231
+ if k in ["temperature", "top_p", "top_k", "repetition_penalty",
232
+ "max_tokens", "stream_output", "seed"]
233
+ }))
234
+
235
+ self.model = ModelConfig(**self._get_section({
236
+ k: v for k, v in self._raw_config.items()
237
+ if k in ["model_path", "default_model", "last_used_model", "model_cache_dir",
238
+ "preload_models", "max_loaded_models", "lazy_load",
239
+ "verify_gpu_compatibility", "default_quantization",
240
+ "supported_quantizations", "auto_quantize", "quantization_cache"]
241
+ }))
242
+
243
+ self.ui = UIConfig(**self._get_section({
244
+ k: v for k, v in self._raw_config.items()
245
+ if k in ["ui_theme", "syntax_highlighting", "markdown_rendering",
246
+ "show_performance_metrics", "show_gpu_utilization",
247
+ "auto_scroll", "copy_on_select", "mouse_support"]
248
+ }))
249
+
250
+ self.logging = LoggingConfig(**self._get_section({
251
+ k: v for k, v in self._raw_config.items()
252
+ if k in ["log_level", "log_file", "log_rotation", "max_log_size",
253
+ "performance_logging", "gpu_metrics_interval"]
254
+ }))
255
+
256
+ self.conversation = ConversationConfig(**self._get_section({
257
+ k: v for k, v in self._raw_config.items()
258
+ if k in ["auto_save", "save_format", "save_directory",
259
+ "max_conversation_history", "enable_branching"]
260
+ }))
261
+
262
+ self.system = SystemConfig(**self._get_section({
263
+ k: v for k, v in self._raw_config.items()
264
+ if k in ["startup_checks", "shutdown_timeout", "crash_recovery",
265
+ "auto_update_check"]
266
+ }))
267
+
268
+ self.developer = DeveloperConfig(**self._get_section({
269
+ k: v for k, v in self._raw_config.items()
270
+ if k in ["debug_mode", "profile_inference", "metal_capture",
271
+ "verbose_gpu_logs"]
272
+ }))
273
+
274
+ self.paths = PathsConfig(**self._get_section({
275
+ k: v for k, v in self._raw_config.items()
276
+ if k in ["claude_md_path", "templates_dir", "plugins_dir"]
277
+ }))
278
+
279
+ except Exception as e:
280
+ print(f"Error parsing configuration: {e}")
281
+ self._use_defaults()
282
+
283
+ def _get_section(self, section_dict: Dict[str, Any]) -> Dict[str, Any]:
284
+ """Get a configuration section."""
285
+ return {k: v for k, v in section_dict.items() if v is not None}
286
+
287
+ def validate_gpu_requirements(self) -> bool:
288
+ """Validate that GPU requirements are met."""
289
+ if self.gpu.compute_backend != "metal":
290
+ print("❌ Only Metal backend is supported")
291
+ return False
292
+
293
+ if not self.gpu.force_gpu:
294
+ print("❌ GPU execution is mandatory")
295
+ return False
296
+
297
+ if self.memory.cpu_offload:
298
+ print("❌ CPU offloading is not allowed")
299
+ return False
300
+
301
+ return True
302
+
303
+ def save(self, path: Optional[Path] = None) -> None:
304
+ """Save configuration to YAML file."""
305
+ save_path = path or self.config_path
306
+
307
+ # Convert Path objects to strings for YAML serialization
308
+ config_dict = {}
309
+ for section in [self.gpu, self.memory, self.performance, self.inference,
310
+ self.model, self.ui, self.logging, self.conversation,
311
+ self.system, self.developer, self.paths]:
312
+ section_dict = section.model_dump()
313
+ # Convert Path objects to strings
314
+ for key, value in section_dict.items():
315
+ if isinstance(value, Path):
316
+ section_dict[key] = str(value)
317
+ config_dict.update(section_dict)
318
+
319
+ with open(save_path, 'w') as f:
320
+ yaml.dump(config_dict, f, default_flow_style=False, sort_keys=False)
321
+
322
+ def update_last_used_model(self, model_name: str) -> None:
323
+ """Update the last used model and save to config file."""
324
+ self.model.last_used_model = model_name
325
+ self.save()
326
+
327
+ def __repr__(self) -> str:
328
+ """String representation."""
329
+ return f"Config(gpu={self.gpu.compute_backend}, memory={self.memory.max_gpu_memory})"