cortex-llm 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
cortex/__init__.py CHANGED
@@ -5,7 +5,7 @@ A high-performance terminal interface for running Hugging Face LLMs locally
5
5
  with exclusive GPU acceleration via Metal Performance Shaders (MPS) and MLX.
6
6
  """
7
7
 
8
- __version__ = "1.0.2"
8
+ __version__ = "1.0.3"
9
9
  __author__ = "Cortex Development Team"
10
10
  __license__ = "MIT"
11
11
 
@@ -25,6 +25,8 @@ try:
25
25
  except ImportError:
26
26
  mlx_generate = None
27
27
  mlx_stream_generate = None
28
+ from cortex.metal.mlx_compat import patch_mlx_lm_device_info
29
+ patch_mlx_lm_device_info()
28
30
 
29
31
  from cortex.config import Config
30
32
  from cortex.model_manager import ModelManager, ModelFormat
@@ -724,4 +726,4 @@ class InferenceEngine:
724
726
  pass
725
727
 
726
728
  except Exception as e:
727
- print(f"Warning: GPU warmup failed: {e}")
729
+ print(f"Warning: GPU warmup failed: {e}")
@@ -21,6 +21,8 @@ except ImportError:
21
21
  # Fallback if mlx_lm is not available
22
22
  generate = None
23
23
  stream_generate = None
24
+ from cortex.metal.mlx_compat import patch_mlx_lm_device_info
25
+ patch_mlx_lm_device_info()
24
26
 
25
27
  @dataclass
26
28
  class MLXConfig:
@@ -675,4 +677,4 @@ class MLXAccelerator:
675
677
  }
676
678
 
677
679
  logger.debug(f"Benchmark results: {result}")
678
- return result
680
+ return result
@@ -0,0 +1,90 @@
1
+ """Compatibility helpers for MLX / mlx_lm API changes."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import contextlib
6
+ from typing import Optional, List, Any
7
+
8
+
9
+ def _get_device_info(mx) -> dict:
10
+ try:
11
+ return mx.device_info()
12
+ except Exception:
13
+ return {}
14
+
15
+
16
+ def patch_mlx_lm_device_info() -> None:
17
+ """Patch mlx_lm call sites to use mx.device_info() instead of mx.metal.device_info()."""
18
+ try:
19
+ import mlx.core as mx
20
+ from mlx.utils import tree_reduce
21
+ except Exception:
22
+ return
23
+
24
+ if not hasattr(mx, "device_info"):
25
+ return
26
+
27
+ try:
28
+ import mlx_lm.generate as mlx_generate
29
+ except Exception:
30
+ mlx_generate = None
31
+
32
+ try:
33
+ import mlx_lm.server as mlx_server
34
+ except Exception:
35
+ mlx_server = None
36
+
37
+ if mlx_generate is not None and getattr(mlx_generate, "__cortex_patched__", False) is False:
38
+ @contextlib.contextmanager
39
+ def wired_limit(model: Any, streams: Optional[List[Any]] = None):
40
+ if not mx.metal.is_available():
41
+ try:
42
+ yield
43
+ finally:
44
+ pass
45
+ return
46
+
47
+ model_bytes = tree_reduce(
48
+ lambda acc, x: acc + x.nbytes if isinstance(x, mx.array) else acc, model, 0
49
+ )
50
+ info = _get_device_info(mx)
51
+ max_rec_size = info.get("max_recommended_working_set_size")
52
+
53
+ if max_rec_size and model_bytes > 0.9 * max_rec_size:
54
+ model_mb = model_bytes // 2**20
55
+ max_rec_mb = max_rec_size // 2**20
56
+ print(
57
+ f"[WARNING] Generating with a model that requires {model_mb} MB "
58
+ f"which is close to the maximum recommended size of {max_rec_mb} "
59
+ "MB. This can be slow. See the documentation for possible work-arounds: "
60
+ "https://github.com/ml-explore/mlx-lm/tree/main#large-models"
61
+ )
62
+
63
+ old_limit = None
64
+ if max_rec_size:
65
+ old_limit = mx.set_wired_limit(max_rec_size)
66
+
67
+ try:
68
+ yield
69
+ finally:
70
+ if streams is not None:
71
+ for s in streams:
72
+ mx.synchronize(s)
73
+ else:
74
+ mx.synchronize()
75
+ if old_limit is not None:
76
+ mx.set_wired_limit(old_limit)
77
+
78
+ mlx_generate.wired_limit = wired_limit
79
+ mlx_generate.__cortex_patched__ = True
80
+
81
+ if mlx_server is not None and getattr(mlx_server, "__cortex_patched__", False) is False:
82
+ def get_system_fingerprint():
83
+ gpu_arch = ""
84
+ if mx.metal.is_available():
85
+ info = _get_device_info(mx)
86
+ gpu_arch = info.get("architecture", "") if isinstance(info, dict) else ""
87
+ return f"{mlx_server.__version__}-{mx.__version__}-{mlx_server.platform.platform()}-{gpu_arch}"
88
+
89
+ mlx_server.get_system_fingerprint = get_system_fingerprint
90
+ mlx_server.__cortex_patched__ = True
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cortex-llm
3
- Version: 1.0.2
3
+ Version: 1.0.3
4
4
  Summary: GPU-Accelerated LLM Terminal for Apple Silicon
5
5
  Home-page: https://github.com/faisalmumtaz/Cortex
6
6
  Author: Cortex Development Team
@@ -1,9 +1,9 @@
1
- cortex/__init__.py,sha256=0McchQSofocWIRoBIgEmR2lhMOL4ke2WcR9O70t4V6A,2202
1
+ cortex/__init__.py,sha256=8MdTyD0Zir7OZiah2ITOoqjFz86iZ6-XFjeNe6ESXgo,2202
2
2
  cortex/__main__.py,sha256=pu1ah74AzLDHLjtZdw14OPELb0BVSacdaVcBvhOjto8,2830
3
3
  cortex/config.py,sha256=txmpJXy3kUEKULZyu1OWb_jkNQRHZClm5ovZfCTX_Zc,13444
4
4
  cortex/conversation_manager.py,sha256=aSTdGjVttsMKIiRPzztP0tOXlqZBkWtgZDNCZGyaR-c,17177
5
5
  cortex/gpu_validator.py,sha256=un6vMQ78MWMnKWIz8n-92v9Fb4g_YXqU_E1pUPinncY,16582
6
- cortex/inference_engine.py,sha256=WUi5YS_sTQFayc-UJbyixHEzZlMN0-ATNdoPNEhHbbo,28543
6
+ cortex/inference_engine.py,sha256=pcoSBw8ooqdJmQtPP8Y-DrBusf6VGWZjPRik9NLSRrg,28632
7
7
  cortex/model_downloader.py,sha256=VuPhvxq_66qKjsPjEWcLW-VmUHzOHik6LBMiGDk-cX8,4977
8
8
  cortex/model_manager.py,sha256=Blk-JA_kajJcDp-h2A4tplECijHPw8LZ8c_fbq0FGFg,100670
9
9
  cortex/fine_tuning/__init__.py,sha256=IXKQqNqN1C3mha3na35i7KI-hMnsqqrmUgV4NrPKHy0,269
@@ -14,7 +14,8 @@ cortex/fine_tuning/wizard.py,sha256=eIRUM3zTqKKATJEbQrBsaOfFfRWfY9BV5FkSAzT82QM,
14
14
  cortex/metal/__init__.py,sha256=Ycs81qVOsaYV4UJocCFGW3rPPBySMPy7eOHKzfc4Q7o,8780
15
15
  cortex/metal/gpu_validator.py,sha256=1YHKJXqicXvTwKIdSj34n1DgKoluy9yho6S1jWt1UAs,5818
16
16
  cortex/metal/memory_pool.py,sha256=g5PFQAiouQe4TyX-SVi-Di1MLysb3YBF77uR4nAEomo,34698
17
- cortex/metal/mlx_accelerator.py,sha256=HknYFHwFOPzAtdrV5OqxBvENX3qwNoiKC6H8lXGmJPg,25637
17
+ cortex/metal/mlx_accelerator.py,sha256=f3tfHAaRQqc5KteXNkf7n610SxOLo2hoGj5_GgqEL2Y,25726
18
+ cortex/metal/mlx_compat.py,sha256=NLcxjF7mXDF1SxjiE2QAk_zkmhYSNybaEvBAwFJAdAg,3070
18
19
  cortex/metal/mlx_converter.py,sha256=lMmIh6PqLwraWInZDLJjktBtumLHD9TYRnhiKCpilrc,26722
19
20
  cortex/metal/mps_optimizer.py,sha256=4r6dj-_KAr3vedCwwu7lR-nIaF4g4D4kkOoF2KiQ0FQ,15307
20
21
  cortex/metal/optimizer.py,sha256=9ixKj8ca1iovF-mFHYGa9_DUHcqgGyzLoP_lIRAzfMM,21996
@@ -40,9 +41,9 @@ cortex/ui/__init__.py,sha256=t3GrHJMHTVgBEKh2_qt4B9mS594V5jriTDqc3eZKMGc,3409
40
41
  cortex/ui/cli.py,sha256=ExzP56n1yV4bdA1EOqHSDFRWhpgpX0lkghq0H0FXw7Q,74661
41
42
  cortex/ui/markdown_render.py,sha256=bXt60vkNYT_jbpKeIg_1OlcrxssmdbMO7RB2E1sWw3E,5759
42
43
  cortex/ui/terminal_app.py,sha256=SF3KqcGFyZ4hpTmgX21idPzOTJLdKGkt4QdA-wwUBNE,18317
43
- cortex_llm-1.0.2.dist-info/licenses/LICENSE,sha256=_frJ3VsZWQGhMznZw2Tgjk7xwfAfDZRcBl43uZh8_4E,1070
44
- cortex_llm-1.0.2.dist-info/METADATA,sha256=BvlqwNOXXuUXCiQU2WWQ11rH4drWMxSSzu5PV-9DlFc,10087
45
- cortex_llm-1.0.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
46
- cortex_llm-1.0.2.dist-info/entry_points.txt,sha256=g83Nuz3iFrNdMLHxGLR2LnscdM7rdQRchuL3WGobQC8,48
47
- cortex_llm-1.0.2.dist-info/top_level.txt,sha256=79LAeTJJ_pMIBy3mkF7uNaN0mdBRt5tGrnne5N_iAio,7
48
- cortex_llm-1.0.2.dist-info/RECORD,,
44
+ cortex_llm-1.0.3.dist-info/licenses/LICENSE,sha256=_frJ3VsZWQGhMznZw2Tgjk7xwfAfDZRcBl43uZh8_4E,1070
45
+ cortex_llm-1.0.3.dist-info/METADATA,sha256=q8VVvfd9ZVe1iDnpNltPQ2PU0_BInjvw4cwkcwQqJdY,10087
46
+ cortex_llm-1.0.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
47
+ cortex_llm-1.0.3.dist-info/entry_points.txt,sha256=g83Nuz3iFrNdMLHxGLR2LnscdM7rdQRchuL3WGobQC8,48
48
+ cortex_llm-1.0.3.dist-info/top_level.txt,sha256=79LAeTJJ_pMIBy3mkF7uNaN0mdBRt5tGrnne5N_iAio,7
49
+ cortex_llm-1.0.3.dist-info/RECORD,,