cortex-llm 1.0.2__py3-none-any.whl → 1.0.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
cortex/__init__.py CHANGED
@@ -5,7 +5,7 @@ A high-performance terminal interface for running Hugging Face LLMs locally
5
5
  with exclusive GPU acceleration via Metal Performance Shaders (MPS) and MLX.
6
6
  """
7
7
 
8
- __version__ = "1.0.2"
8
+ __version__ = "1.0.4"
9
9
  __author__ = "Cortex Development Team"
10
10
  __license__ = "MIT"
11
11
 
cortex/__main__.py CHANGED
@@ -9,11 +9,12 @@ import warnings
9
9
  # This prevents the semaphore leak warning from transformers library
10
10
  os.environ['PYTHONWARNINGS'] = 'ignore::UserWarning:multiprocessing.resource_tracker'
11
11
 
12
- # Silence known MLX deprecation warning surfaced during generation.
13
- warnings.filterwarnings(
14
- "ignore",
15
- message=r"mx\.metal\.device_info is deprecated.*",
16
- )
12
+ # Apply MLX compatibility shims before any MLX/MLX-LM imports.
13
+ try:
14
+ from cortex.metal.mlx_compat import patch_mlx_lm_device_info
15
+ patch_mlx_lm_device_info()
16
+ except Exception:
17
+ pass
17
18
 
18
19
  # Alternative: Monkey-patch the resource tracker before it's used
19
20
  try:
@@ -25,6 +25,8 @@ try:
25
25
  except ImportError:
26
26
  mlx_generate = None
27
27
  mlx_stream_generate = None
28
+ from cortex.metal.mlx_compat import patch_mlx_lm_device_info
29
+ patch_mlx_lm_device_info()
28
30
 
29
31
  from cortex.config import Config
30
32
  from cortex.model_manager import ModelManager, ModelFormat
@@ -724,4 +726,4 @@ class InferenceEngine:
724
726
  pass
725
727
 
726
728
  except Exception as e:
727
- print(f"Warning: GPU warmup failed: {e}")
729
+ print(f"Warning: GPU warmup failed: {e}")
@@ -21,6 +21,8 @@ except ImportError:
21
21
  # Fallback if mlx_lm is not available
22
22
  generate = None
23
23
  stream_generate = None
24
+ from cortex.metal.mlx_compat import patch_mlx_lm_device_info
25
+ patch_mlx_lm_device_info()
24
26
 
25
27
  @dataclass
26
28
  class MLXConfig:
@@ -675,4 +677,4 @@ class MLXAccelerator:
675
677
  }
676
678
 
677
679
  logger.debug(f"Benchmark results: {result}")
678
- return result
680
+ return result
@@ -0,0 +1,105 @@
1
+ """Compatibility helpers for MLX / mlx_lm API changes."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import contextlib
6
+ from typing import Optional, List, Any
7
+
8
+
9
+ def _get_device_info(mx) -> dict:
10
+ try:
11
+ return mx.device_info()
12
+ except Exception:
13
+ return {}
14
+
15
+ def patch_mlx_device_info() -> None:
16
+ """Redirect deprecated mx.metal.device_info to mx.device_info when possible."""
17
+ try:
18
+ import mlx.core as mx
19
+ except Exception:
20
+ return
21
+
22
+ if hasattr(mx, "device_info") and hasattr(mx, "metal") and hasattr(mx.metal, "device_info"):
23
+ try:
24
+ mx.metal.device_info = mx.device_info # type: ignore[attr-defined]
25
+ except Exception:
26
+ pass
27
+
28
+
29
+ def patch_mlx_lm_device_info() -> None:
30
+ """Patch mlx_lm call sites to use mx.device_info() instead of mx.metal.device_info()."""
31
+ try:
32
+ import mlx.core as mx
33
+ from mlx.utils import tree_reduce
34
+ except Exception:
35
+ return
36
+
37
+ if not hasattr(mx, "device_info"):
38
+ return
39
+
40
+ patch_mlx_device_info()
41
+
42
+ try:
43
+ import mlx_lm.generate as mlx_generate
44
+ except Exception:
45
+ mlx_generate = None
46
+
47
+ try:
48
+ import mlx_lm.server as mlx_server
49
+ except Exception:
50
+ mlx_server = None
51
+
52
+ if mlx_generate is not None and getattr(mlx_generate, "__cortex_patched__", False) is False:
53
+ @contextlib.contextmanager
54
+ def wired_limit(model: Any, streams: Optional[List[Any]] = None):
55
+ if not mx.metal.is_available():
56
+ try:
57
+ yield
58
+ finally:
59
+ pass
60
+ return
61
+
62
+ model_bytes = tree_reduce(
63
+ lambda acc, x: acc + x.nbytes if isinstance(x, mx.array) else acc, model, 0
64
+ )
65
+ info = _get_device_info(mx)
66
+ max_rec_size = info.get("max_recommended_working_set_size")
67
+
68
+ if max_rec_size and model_bytes > 0.9 * max_rec_size:
69
+ model_mb = model_bytes // 2**20
70
+ max_rec_mb = max_rec_size // 2**20
71
+ print(
72
+ f"[WARNING] Generating with a model that requires {model_mb} MB "
73
+ f"which is close to the maximum recommended size of {max_rec_mb} "
74
+ "MB. This can be slow. See the documentation for possible work-arounds: "
75
+ "https://github.com/ml-explore/mlx-lm/tree/main#large-models"
76
+ )
77
+
78
+ old_limit = None
79
+ if max_rec_size:
80
+ old_limit = mx.set_wired_limit(max_rec_size)
81
+
82
+ try:
83
+ yield
84
+ finally:
85
+ if streams is not None:
86
+ for s in streams:
87
+ mx.synchronize(s)
88
+ else:
89
+ mx.synchronize()
90
+ if old_limit is not None:
91
+ mx.set_wired_limit(old_limit)
92
+
93
+ mlx_generate.wired_limit = wired_limit
94
+ mlx_generate.__cortex_patched__ = True
95
+
96
+ if mlx_server is not None and getattr(mlx_server, "__cortex_patched__", False) is False:
97
+ def get_system_fingerprint():
98
+ gpu_arch = ""
99
+ if mx.metal.is_available():
100
+ info = _get_device_info(mx)
101
+ gpu_arch = info.get("architecture", "") if isinstance(info, dict) else ""
102
+ return f"{mlx_server.__version__}-{mx.__version__}-{mlx_server.platform.platform()}-{gpu_arch}"
103
+
104
+ mlx_server.get_system_fingerprint = get_system_fingerprint
105
+ mlx_server.__cortex_patched__ = True
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cortex-llm
3
- Version: 1.0.2
3
+ Version: 1.0.4
4
4
  Summary: GPU-Accelerated LLM Terminal for Apple Silicon
5
5
  Home-page: https://github.com/faisalmumtaz/Cortex
6
6
  Author: Cortex Development Team
@@ -1,9 +1,9 @@
1
- cortex/__init__.py,sha256=0McchQSofocWIRoBIgEmR2lhMOL4ke2WcR9O70t4V6A,2202
2
- cortex/__main__.py,sha256=pu1ah74AzLDHLjtZdw14OPELb0BVSacdaVcBvhOjto8,2830
1
+ cortex/__init__.py,sha256=6KYzL3KARjSGTCZnrmxFxVlfuMUFtIwCL4cK2ekXOAs,2202
2
+ cortex/__main__.py,sha256=I7Njt7BjGoHtPhftDoA44OyOYbwWNNaPwP_qlJSn0J4,2857
3
3
  cortex/config.py,sha256=txmpJXy3kUEKULZyu1OWb_jkNQRHZClm5ovZfCTX_Zc,13444
4
4
  cortex/conversation_manager.py,sha256=aSTdGjVttsMKIiRPzztP0tOXlqZBkWtgZDNCZGyaR-c,17177
5
5
  cortex/gpu_validator.py,sha256=un6vMQ78MWMnKWIz8n-92v9Fb4g_YXqU_E1pUPinncY,16582
6
- cortex/inference_engine.py,sha256=WUi5YS_sTQFayc-UJbyixHEzZlMN0-ATNdoPNEhHbbo,28543
6
+ cortex/inference_engine.py,sha256=pcoSBw8ooqdJmQtPP8Y-DrBusf6VGWZjPRik9NLSRrg,28632
7
7
  cortex/model_downloader.py,sha256=VuPhvxq_66qKjsPjEWcLW-VmUHzOHik6LBMiGDk-cX8,4977
8
8
  cortex/model_manager.py,sha256=Blk-JA_kajJcDp-h2A4tplECijHPw8LZ8c_fbq0FGFg,100670
9
9
  cortex/fine_tuning/__init__.py,sha256=IXKQqNqN1C3mha3na35i7KI-hMnsqqrmUgV4NrPKHy0,269
@@ -14,7 +14,8 @@ cortex/fine_tuning/wizard.py,sha256=eIRUM3zTqKKATJEbQrBsaOfFfRWfY9BV5FkSAzT82QM,
14
14
  cortex/metal/__init__.py,sha256=Ycs81qVOsaYV4UJocCFGW3rPPBySMPy7eOHKzfc4Q7o,8780
15
15
  cortex/metal/gpu_validator.py,sha256=1YHKJXqicXvTwKIdSj34n1DgKoluy9yho6S1jWt1UAs,5818
16
16
  cortex/metal/memory_pool.py,sha256=g5PFQAiouQe4TyX-SVi-Di1MLysb3YBF77uR4nAEomo,34698
17
- cortex/metal/mlx_accelerator.py,sha256=HknYFHwFOPzAtdrV5OqxBvENX3qwNoiKC6H8lXGmJPg,25637
17
+ cortex/metal/mlx_accelerator.py,sha256=f3tfHAaRQqc5KteXNkf7n610SxOLo2hoGj5_GgqEL2Y,25726
18
+ cortex/metal/mlx_compat.py,sha256=oZ_RNjJzWs6h6Q3mSNK-K2--BhJwKt7d_tYt3uMN1pM,3531
18
19
  cortex/metal/mlx_converter.py,sha256=lMmIh6PqLwraWInZDLJjktBtumLHD9TYRnhiKCpilrc,26722
19
20
  cortex/metal/mps_optimizer.py,sha256=4r6dj-_KAr3vedCwwu7lR-nIaF4g4D4kkOoF2KiQ0FQ,15307
20
21
  cortex/metal/optimizer.py,sha256=9ixKj8ca1iovF-mFHYGa9_DUHcqgGyzLoP_lIRAzfMM,21996
@@ -40,9 +41,9 @@ cortex/ui/__init__.py,sha256=t3GrHJMHTVgBEKh2_qt4B9mS594V5jriTDqc3eZKMGc,3409
40
41
  cortex/ui/cli.py,sha256=ExzP56n1yV4bdA1EOqHSDFRWhpgpX0lkghq0H0FXw7Q,74661
41
42
  cortex/ui/markdown_render.py,sha256=bXt60vkNYT_jbpKeIg_1OlcrxssmdbMO7RB2E1sWw3E,5759
42
43
  cortex/ui/terminal_app.py,sha256=SF3KqcGFyZ4hpTmgX21idPzOTJLdKGkt4QdA-wwUBNE,18317
43
- cortex_llm-1.0.2.dist-info/licenses/LICENSE,sha256=_frJ3VsZWQGhMznZw2Tgjk7xwfAfDZRcBl43uZh8_4E,1070
44
- cortex_llm-1.0.2.dist-info/METADATA,sha256=BvlqwNOXXuUXCiQU2WWQ11rH4drWMxSSzu5PV-9DlFc,10087
45
- cortex_llm-1.0.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
46
- cortex_llm-1.0.2.dist-info/entry_points.txt,sha256=g83Nuz3iFrNdMLHxGLR2LnscdM7rdQRchuL3WGobQC8,48
47
- cortex_llm-1.0.2.dist-info/top_level.txt,sha256=79LAeTJJ_pMIBy3mkF7uNaN0mdBRt5tGrnne5N_iAio,7
48
- cortex_llm-1.0.2.dist-info/RECORD,,
44
+ cortex_llm-1.0.4.dist-info/licenses/LICENSE,sha256=_frJ3VsZWQGhMznZw2Tgjk7xwfAfDZRcBl43uZh8_4E,1070
45
+ cortex_llm-1.0.4.dist-info/METADATA,sha256=rX0lVqvlXVaLNMfn3QWJH2rYSShxAiH7v6d_fWKvkYg,10087
46
+ cortex_llm-1.0.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
47
+ cortex_llm-1.0.4.dist-info/entry_points.txt,sha256=g83Nuz3iFrNdMLHxGLR2LnscdM7rdQRchuL3WGobQC8,48
48
+ cortex_llm-1.0.4.dist-info/top_level.txt,sha256=79LAeTJJ_pMIBy3mkF7uNaN0mdBRt5tGrnne5N_iAio,7
49
+ cortex_llm-1.0.4.dist-info/RECORD,,