cortex-llm 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cortex/__init__.py +1 -1
- cortex/inference_engine.py +3 -1
- cortex/metal/mlx_accelerator.py +3 -1
- cortex/metal/mlx_compat.py +90 -0
- {cortex_llm-1.0.2.dist-info → cortex_llm-1.0.3.dist-info}/METADATA +1 -1
- {cortex_llm-1.0.2.dist-info → cortex_llm-1.0.3.dist-info}/RECORD +10 -9
- {cortex_llm-1.0.2.dist-info → cortex_llm-1.0.3.dist-info}/WHEEL +0 -0
- {cortex_llm-1.0.2.dist-info → cortex_llm-1.0.3.dist-info}/entry_points.txt +0 -0
- {cortex_llm-1.0.2.dist-info → cortex_llm-1.0.3.dist-info}/licenses/LICENSE +0 -0
- {cortex_llm-1.0.2.dist-info → cortex_llm-1.0.3.dist-info}/top_level.txt +0 -0
cortex/__init__.py
CHANGED
|
@@ -5,7 +5,7 @@ A high-performance terminal interface for running Hugging Face LLMs locally
|
|
|
5
5
|
with exclusive GPU acceleration via Metal Performance Shaders (MPS) and MLX.
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
|
-
__version__ = "1.0.
|
|
8
|
+
__version__ = "1.0.3"
|
|
9
9
|
__author__ = "Cortex Development Team"
|
|
10
10
|
__license__ = "MIT"
|
|
11
11
|
|
cortex/inference_engine.py
CHANGED
|
@@ -25,6 +25,8 @@ try:
|
|
|
25
25
|
except ImportError:
|
|
26
26
|
mlx_generate = None
|
|
27
27
|
mlx_stream_generate = None
|
|
28
|
+
from cortex.metal.mlx_compat import patch_mlx_lm_device_info
|
|
29
|
+
patch_mlx_lm_device_info()
|
|
28
30
|
|
|
29
31
|
from cortex.config import Config
|
|
30
32
|
from cortex.model_manager import ModelManager, ModelFormat
|
|
@@ -724,4 +726,4 @@ class InferenceEngine:
|
|
|
724
726
|
pass
|
|
725
727
|
|
|
726
728
|
except Exception as e:
|
|
727
|
-
print(f"Warning: GPU warmup failed: {e}")
|
|
729
|
+
print(f"Warning: GPU warmup failed: {e}")
|
cortex/metal/mlx_accelerator.py
CHANGED
|
@@ -21,6 +21,8 @@ except ImportError:
|
|
|
21
21
|
# Fallback if mlx_lm is not available
|
|
22
22
|
generate = None
|
|
23
23
|
stream_generate = None
|
|
24
|
+
from cortex.metal.mlx_compat import patch_mlx_lm_device_info
|
|
25
|
+
patch_mlx_lm_device_info()
|
|
24
26
|
|
|
25
27
|
@dataclass
|
|
26
28
|
class MLXConfig:
|
|
@@ -675,4 +677,4 @@ class MLXAccelerator:
|
|
|
675
677
|
}
|
|
676
678
|
|
|
677
679
|
logger.debug(f"Benchmark results: {result}")
|
|
678
|
-
return result
|
|
680
|
+
return result
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
"""Compatibility helpers for MLX / mlx_lm API changes."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import contextlib
|
|
6
|
+
from typing import Optional, List, Any
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def _get_device_info(mx) -> dict:
|
|
10
|
+
try:
|
|
11
|
+
return mx.device_info()
|
|
12
|
+
except Exception:
|
|
13
|
+
return {}
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def patch_mlx_lm_device_info() -> None:
|
|
17
|
+
"""Patch mlx_lm call sites to use mx.device_info() instead of mx.metal.device_info()."""
|
|
18
|
+
try:
|
|
19
|
+
import mlx.core as mx
|
|
20
|
+
from mlx.utils import tree_reduce
|
|
21
|
+
except Exception:
|
|
22
|
+
return
|
|
23
|
+
|
|
24
|
+
if not hasattr(mx, "device_info"):
|
|
25
|
+
return
|
|
26
|
+
|
|
27
|
+
try:
|
|
28
|
+
import mlx_lm.generate as mlx_generate
|
|
29
|
+
except Exception:
|
|
30
|
+
mlx_generate = None
|
|
31
|
+
|
|
32
|
+
try:
|
|
33
|
+
import mlx_lm.server as mlx_server
|
|
34
|
+
except Exception:
|
|
35
|
+
mlx_server = None
|
|
36
|
+
|
|
37
|
+
if mlx_generate is not None and getattr(mlx_generate, "__cortex_patched__", False) is False:
|
|
38
|
+
@contextlib.contextmanager
|
|
39
|
+
def wired_limit(model: Any, streams: Optional[List[Any]] = None):
|
|
40
|
+
if not mx.metal.is_available():
|
|
41
|
+
try:
|
|
42
|
+
yield
|
|
43
|
+
finally:
|
|
44
|
+
pass
|
|
45
|
+
return
|
|
46
|
+
|
|
47
|
+
model_bytes = tree_reduce(
|
|
48
|
+
lambda acc, x: acc + x.nbytes if isinstance(x, mx.array) else acc, model, 0
|
|
49
|
+
)
|
|
50
|
+
info = _get_device_info(mx)
|
|
51
|
+
max_rec_size = info.get("max_recommended_working_set_size")
|
|
52
|
+
|
|
53
|
+
if max_rec_size and model_bytes > 0.9 * max_rec_size:
|
|
54
|
+
model_mb = model_bytes // 2**20
|
|
55
|
+
max_rec_mb = max_rec_size // 2**20
|
|
56
|
+
print(
|
|
57
|
+
f"[WARNING] Generating with a model that requires {model_mb} MB "
|
|
58
|
+
f"which is close to the maximum recommended size of {max_rec_mb} "
|
|
59
|
+
"MB. This can be slow. See the documentation for possible work-arounds: "
|
|
60
|
+
"https://github.com/ml-explore/mlx-lm/tree/main#large-models"
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
old_limit = None
|
|
64
|
+
if max_rec_size:
|
|
65
|
+
old_limit = mx.set_wired_limit(max_rec_size)
|
|
66
|
+
|
|
67
|
+
try:
|
|
68
|
+
yield
|
|
69
|
+
finally:
|
|
70
|
+
if streams is not None:
|
|
71
|
+
for s in streams:
|
|
72
|
+
mx.synchronize(s)
|
|
73
|
+
else:
|
|
74
|
+
mx.synchronize()
|
|
75
|
+
if old_limit is not None:
|
|
76
|
+
mx.set_wired_limit(old_limit)
|
|
77
|
+
|
|
78
|
+
mlx_generate.wired_limit = wired_limit
|
|
79
|
+
mlx_generate.__cortex_patched__ = True
|
|
80
|
+
|
|
81
|
+
if mlx_server is not None and getattr(mlx_server, "__cortex_patched__", False) is False:
|
|
82
|
+
def get_system_fingerprint():
|
|
83
|
+
gpu_arch = ""
|
|
84
|
+
if mx.metal.is_available():
|
|
85
|
+
info = _get_device_info(mx)
|
|
86
|
+
gpu_arch = info.get("architecture", "") if isinstance(info, dict) else ""
|
|
87
|
+
return f"{mlx_server.__version__}-{mx.__version__}-{mlx_server.platform.platform()}-{gpu_arch}"
|
|
88
|
+
|
|
89
|
+
mlx_server.get_system_fingerprint = get_system_fingerprint
|
|
90
|
+
mlx_server.__cortex_patched__ = True
|
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
cortex/__init__.py,sha256=
|
|
1
|
+
cortex/__init__.py,sha256=8MdTyD0Zir7OZiah2ITOoqjFz86iZ6-XFjeNe6ESXgo,2202
|
|
2
2
|
cortex/__main__.py,sha256=pu1ah74AzLDHLjtZdw14OPELb0BVSacdaVcBvhOjto8,2830
|
|
3
3
|
cortex/config.py,sha256=txmpJXy3kUEKULZyu1OWb_jkNQRHZClm5ovZfCTX_Zc,13444
|
|
4
4
|
cortex/conversation_manager.py,sha256=aSTdGjVttsMKIiRPzztP0tOXlqZBkWtgZDNCZGyaR-c,17177
|
|
5
5
|
cortex/gpu_validator.py,sha256=un6vMQ78MWMnKWIz8n-92v9Fb4g_YXqU_E1pUPinncY,16582
|
|
6
|
-
cortex/inference_engine.py,sha256=
|
|
6
|
+
cortex/inference_engine.py,sha256=pcoSBw8ooqdJmQtPP8Y-DrBusf6VGWZjPRik9NLSRrg,28632
|
|
7
7
|
cortex/model_downloader.py,sha256=VuPhvxq_66qKjsPjEWcLW-VmUHzOHik6LBMiGDk-cX8,4977
|
|
8
8
|
cortex/model_manager.py,sha256=Blk-JA_kajJcDp-h2A4tplECijHPw8LZ8c_fbq0FGFg,100670
|
|
9
9
|
cortex/fine_tuning/__init__.py,sha256=IXKQqNqN1C3mha3na35i7KI-hMnsqqrmUgV4NrPKHy0,269
|
|
@@ -14,7 +14,8 @@ cortex/fine_tuning/wizard.py,sha256=eIRUM3zTqKKATJEbQrBsaOfFfRWfY9BV5FkSAzT82QM,
|
|
|
14
14
|
cortex/metal/__init__.py,sha256=Ycs81qVOsaYV4UJocCFGW3rPPBySMPy7eOHKzfc4Q7o,8780
|
|
15
15
|
cortex/metal/gpu_validator.py,sha256=1YHKJXqicXvTwKIdSj34n1DgKoluy9yho6S1jWt1UAs,5818
|
|
16
16
|
cortex/metal/memory_pool.py,sha256=g5PFQAiouQe4TyX-SVi-Di1MLysb3YBF77uR4nAEomo,34698
|
|
17
|
-
cortex/metal/mlx_accelerator.py,sha256=
|
|
17
|
+
cortex/metal/mlx_accelerator.py,sha256=f3tfHAaRQqc5KteXNkf7n610SxOLo2hoGj5_GgqEL2Y,25726
|
|
18
|
+
cortex/metal/mlx_compat.py,sha256=NLcxjF7mXDF1SxjiE2QAk_zkmhYSNybaEvBAwFJAdAg,3070
|
|
18
19
|
cortex/metal/mlx_converter.py,sha256=lMmIh6PqLwraWInZDLJjktBtumLHD9TYRnhiKCpilrc,26722
|
|
19
20
|
cortex/metal/mps_optimizer.py,sha256=4r6dj-_KAr3vedCwwu7lR-nIaF4g4D4kkOoF2KiQ0FQ,15307
|
|
20
21
|
cortex/metal/optimizer.py,sha256=9ixKj8ca1iovF-mFHYGa9_DUHcqgGyzLoP_lIRAzfMM,21996
|
|
@@ -40,9 +41,9 @@ cortex/ui/__init__.py,sha256=t3GrHJMHTVgBEKh2_qt4B9mS594V5jriTDqc3eZKMGc,3409
|
|
|
40
41
|
cortex/ui/cli.py,sha256=ExzP56n1yV4bdA1EOqHSDFRWhpgpX0lkghq0H0FXw7Q,74661
|
|
41
42
|
cortex/ui/markdown_render.py,sha256=bXt60vkNYT_jbpKeIg_1OlcrxssmdbMO7RB2E1sWw3E,5759
|
|
42
43
|
cortex/ui/terminal_app.py,sha256=SF3KqcGFyZ4hpTmgX21idPzOTJLdKGkt4QdA-wwUBNE,18317
|
|
43
|
-
cortex_llm-1.0.
|
|
44
|
-
cortex_llm-1.0.
|
|
45
|
-
cortex_llm-1.0.
|
|
46
|
-
cortex_llm-1.0.
|
|
47
|
-
cortex_llm-1.0.
|
|
48
|
-
cortex_llm-1.0.
|
|
44
|
+
cortex_llm-1.0.3.dist-info/licenses/LICENSE,sha256=_frJ3VsZWQGhMznZw2Tgjk7xwfAfDZRcBl43uZh8_4E,1070
|
|
45
|
+
cortex_llm-1.0.3.dist-info/METADATA,sha256=q8VVvfd9ZVe1iDnpNltPQ2PU0_BInjvw4cwkcwQqJdY,10087
|
|
46
|
+
cortex_llm-1.0.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
47
|
+
cortex_llm-1.0.3.dist-info/entry_points.txt,sha256=g83Nuz3iFrNdMLHxGLR2LnscdM7rdQRchuL3WGobQC8,48
|
|
48
|
+
cortex_llm-1.0.3.dist-info/top_level.txt,sha256=79LAeTJJ_pMIBy3mkF7uNaN0mdBRt5tGrnne5N_iAio,7
|
|
49
|
+
cortex_llm-1.0.3.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|