PyPI - cortex-llm - Versions diffs - 1.0.2__py3-none-any.whl → 1.0.4__py3-none-any.whl - Mend

cortex-llm 1.0.2py3-none-any.whl → 1.0.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

cortex/__init__.py CHANGED Viewed

@@ -5,7 +5,7 @@ A high-performance terminal interface for running Hugging Face LLMs locally
 with exclusive GPU acceleration via Metal Performance Shaders (MPS) and MLX.
 """
-__version__ = "1.0.2"
+__version__ = "1.0.4"
 __author__ = "Cortex Development Team"
 __license__ = "MIT"

cortex/__main__.py CHANGED Viewed

@@ -9,11 +9,12 @@ import warnings
 # This prevents the semaphore leak warning from transformers library
 os.environ['PYTHONWARNINGS'] = 'ignore::UserWarning:multiprocessing.resource_tracker'
-# Silence known MLX deprecation warning surfaced during generation.
-warnings.filterwarnings(
-    "ignore",
-    message=r"mx\.metal\.device_info is deprecated.*",
-)
+# Apply MLX compatibility shims before any MLX/MLX-LM imports.
+try:
+    from cortex.metal.mlx_compat import patch_mlx_lm_device_info
+    patch_mlx_lm_device_info()
+except Exception:
+    pass
 # Alternative: Monkey-patch the resource tracker before it's used
 try:

cortex/inference_engine.py CHANGED Viewed

@@ -25,6 +25,8 @@ try:
 except ImportError:
     mlx_generate = None
     mlx_stream_generate = None
+from cortex.metal.mlx_compat import patch_mlx_lm_device_info
+patch_mlx_lm_device_info()
 from cortex.config import Config
 from cortex.model_manager import ModelManager, ModelFormat
@@ -724,4 +726,4 @@ class InferenceEngine:
                 pass
         except Exception as e:
-            print(f"Warning: GPU warmup failed: {e}")
+            print(f"Warning: GPU warmup failed: {e}")

cortex/metal/mlx_accelerator.py CHANGED Viewed

@@ -21,6 +21,8 @@ except ImportError:
     # Fallback if mlx_lm is not available
     generate = None
     stream_generate = None
+from cortex.metal.mlx_compat import patch_mlx_lm_device_info
+patch_mlx_lm_device_info()
 @dataclass
 class MLXConfig:
@@ -675,4 +677,4 @@ class MLXAccelerator:
         }
         logger.debug(f"Benchmark results: {result}")
-        return result
+        return result

cortex/metal/mlx_compat.py ADDED Viewed

@@ -0,0 +1,105 @@
+"""Compatibility helpers for MLX / mlx_lm API changes."""
+from __future__ import annotations
+import contextlib
+from typing import Optional, List, Any
+def _get_device_info(mx) -> dict:
+    try:
+        return mx.device_info()
+    except Exception:
+        return {}
+def patch_mlx_device_info() -> None:
+    """Redirect deprecated mx.metal.device_info to mx.device_info when possible."""
+    try:
+        import mlx.core as mx
+    except Exception:
+        return
+    if hasattr(mx, "device_info") and hasattr(mx, "metal") and hasattr(mx.metal, "device_info"):
+        try:
+            mx.metal.device_info = mx.device_info  # type: ignore[attr-defined]
+        except Exception:
+            pass
+def patch_mlx_lm_device_info() -> None:
+    """Patch mlx_lm call sites to use mx.device_info() instead of mx.metal.device_info()."""
+    try:
+        import mlx.core as mx
+        from mlx.utils import tree_reduce
+    except Exception:
+        return
+    if not hasattr(mx, "device_info"):
+        return
+    patch_mlx_device_info()
+    try:
+        import mlx_lm.generate as mlx_generate
+    except Exception:
+        mlx_generate = None
+    try:
+        import mlx_lm.server as mlx_server
+    except Exception:
+        mlx_server = None
+    if mlx_generate is not None and getattr(mlx_generate, "__cortex_patched__", False) is False:
+        @contextlib.contextmanager
+        def wired_limit(model: Any, streams: Optional[List[Any]] = None):
+            if not mx.metal.is_available():
+                try:
+                    yield
+                finally:
+                    pass
+                return
+            model_bytes = tree_reduce(
+                lambda acc, x: acc + x.nbytes if isinstance(x, mx.array) else acc, model, 0
+            )
+            info = _get_device_info(mx)
+            max_rec_size = info.get("max_recommended_working_set_size")
+            if max_rec_size and model_bytes > 0.9 * max_rec_size:
+                model_mb = model_bytes // 2**20
+                max_rec_mb = max_rec_size // 2**20
+                print(
+                    f"[WARNING] Generating with a model that requires {model_mb} MB "
+                    f"which is close to the maximum recommended size of {max_rec_mb} "
+                    "MB. This can be slow. See the documentation for possible work-arounds: "
+                    "https://github.com/ml-explore/mlx-lm/tree/main#large-models"
+                )
+            old_limit = None
+            if max_rec_size:
+                old_limit = mx.set_wired_limit(max_rec_size)
+            try:
+                yield
+            finally:
+                if streams is not None:
+                    for s in streams:
+                        mx.synchronize(s)
+                else:
+                    mx.synchronize()
+                if old_limit is not None:
+                    mx.set_wired_limit(old_limit)
+        mlx_generate.wired_limit = wired_limit
+        mlx_generate.__cortex_patched__ = True
+    if mlx_server is not None and getattr(mlx_server, "__cortex_patched__", False) is False:
+        def get_system_fingerprint():
+            gpu_arch = ""
+            if mx.metal.is_available():
+                info = _get_device_info(mx)
+                gpu_arch = info.get("architecture", "") if isinstance(info, dict) else ""
+            return f"{mlx_server.__version__}-{mx.__version__}-{mlx_server.platform.platform()}-{gpu_arch}"
+        mlx_server.get_system_fingerprint = get_system_fingerprint
+        mlx_server.__cortex_patched__ = True

{cortex_llm-1.0.2.dist-info → cortex_llm-1.0.4.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: cortex-llm
-Version: 1.0.2
+Version: 1.0.4
 Summary: GPU-Accelerated LLM Terminal for Apple Silicon
 Home-page: https://github.com/faisalmumtaz/Cortex
 Author: Cortex Development Team

{cortex_llm-1.0.2.dist-info → cortex_llm-1.0.4.dist-info}/RECORD RENAMED Viewed

@@ -1,9 +1,9 @@
-cortex/__init__.py,sha256=0McchQSofocWIRoBIgEmR2lhMOL4ke2WcR9O70t4V6A,2202
-cortex/__main__.py,sha256=pu1ah74AzLDHLjtZdw14OPELb0BVSacdaVcBvhOjto8,2830
+cortex/__init__.py,sha256=6KYzL3KARjSGTCZnrmxFxVlfuMUFtIwCL4cK2ekXOAs,2202
+cortex/__main__.py,sha256=I7Njt7BjGoHtPhftDoA44OyOYbwWNNaPwP_qlJSn0J4,2857
 cortex/config.py,sha256=txmpJXy3kUEKULZyu1OWb_jkNQRHZClm5ovZfCTX_Zc,13444
 cortex/conversation_manager.py,sha256=aSTdGjVttsMKIiRPzztP0tOXlqZBkWtgZDNCZGyaR-c,17177
 cortex/gpu_validator.py,sha256=un6vMQ78MWMnKWIz8n-92v9Fb4g_YXqU_E1pUPinncY,16582
-cortex/inference_engine.py,sha256=WUi5YS_sTQFayc-UJbyixHEzZlMN0-ATNdoPNEhHbbo,28543
+cortex/inference_engine.py,sha256=pcoSBw8ooqdJmQtPP8Y-DrBusf6VGWZjPRik9NLSRrg,28632
 cortex/model_downloader.py,sha256=VuPhvxq_66qKjsPjEWcLW-VmUHzOHik6LBMiGDk-cX8,4977
 cortex/model_manager.py,sha256=Blk-JA_kajJcDp-h2A4tplECijHPw8LZ8c_fbq0FGFg,100670
 cortex/fine_tuning/__init__.py,sha256=IXKQqNqN1C3mha3na35i7KI-hMnsqqrmUgV4NrPKHy0,269
@@ -14,7 +14,8 @@ cortex/fine_tuning/wizard.py,sha256=eIRUM3zTqKKATJEbQrBsaOfFfRWfY9BV5FkSAzT82QM,
 cortex/metal/__init__.py,sha256=Ycs81qVOsaYV4UJocCFGW3rPPBySMPy7eOHKzfc4Q7o,8780
 cortex/metal/gpu_validator.py,sha256=1YHKJXqicXvTwKIdSj34n1DgKoluy9yho6S1jWt1UAs,5818
 cortex/metal/memory_pool.py,sha256=g5PFQAiouQe4TyX-SVi-Di1MLysb3YBF77uR4nAEomo,34698
-cortex/metal/mlx_accelerator.py,sha256=HknYFHwFOPzAtdrV5OqxBvENX3qwNoiKC6H8lXGmJPg,25637
+cortex/metal/mlx_accelerator.py,sha256=f3tfHAaRQqc5KteXNkf7n610SxOLo2hoGj5_GgqEL2Y,25726
+cortex/metal/mlx_compat.py,sha256=oZ_RNjJzWs6h6Q3mSNK-K2--BhJwKt7d_tYt3uMN1pM,3531
 cortex/metal/mlx_converter.py,sha256=lMmIh6PqLwraWInZDLJjktBtumLHD9TYRnhiKCpilrc,26722
 cortex/metal/mps_optimizer.py,sha256=4r6dj-_KAr3vedCwwu7lR-nIaF4g4D4kkOoF2KiQ0FQ,15307
 cortex/metal/optimizer.py,sha256=9ixKj8ca1iovF-mFHYGa9_DUHcqgGyzLoP_lIRAzfMM,21996
@@ -40,9 +41,9 @@ cortex/ui/__init__.py,sha256=t3GrHJMHTVgBEKh2_qt4B9mS594V5jriTDqc3eZKMGc,3409
 cortex/ui/cli.py,sha256=ExzP56n1yV4bdA1EOqHSDFRWhpgpX0lkghq0H0FXw7Q,74661
 cortex/ui/markdown_render.py,sha256=bXt60vkNYT_jbpKeIg_1OlcrxssmdbMO7RB2E1sWw3E,5759
 cortex/ui/terminal_app.py,sha256=SF3KqcGFyZ4hpTmgX21idPzOTJLdKGkt4QdA-wwUBNE,18317
-cortex_llm-1.0.2.dist-info/licenses/LICENSE,sha256=_frJ3VsZWQGhMznZw2Tgjk7xwfAfDZRcBl43uZh8_4E,1070
-cortex_llm-1.0.2.dist-info/METADATA,sha256=BvlqwNOXXuUXCiQU2WWQ11rH4drWMxSSzu5PV-9DlFc,10087
-cortex_llm-1.0.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-cortex_llm-1.0.2.dist-info/entry_points.txt,sha256=g83Nuz3iFrNdMLHxGLR2LnscdM7rdQRchuL3WGobQC8,48
-cortex_llm-1.0.2.dist-info/top_level.txt,sha256=79LAeTJJ_pMIBy3mkF7uNaN0mdBRt5tGrnne5N_iAio,7
-cortex_llm-1.0.2.dist-info/RECORD,,
+cortex_llm-1.0.4.dist-info/licenses/LICENSE,sha256=_frJ3VsZWQGhMznZw2Tgjk7xwfAfDZRcBl43uZh8_4E,1070
+cortex_llm-1.0.4.dist-info/METADATA,sha256=rX0lVqvlXVaLNMfn3QWJH2rYSShxAiH7v6d_fWKvkYg,10087
+cortex_llm-1.0.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+cortex_llm-1.0.4.dist-info/entry_points.txt,sha256=g83Nuz3iFrNdMLHxGLR2LnscdM7rdQRchuL3WGobQC8,48
+cortex_llm-1.0.4.dist-info/top_level.txt,sha256=79LAeTJJ_pMIBy3mkF7uNaN0mdBRt5tGrnne5N_iAio,7
+cortex_llm-1.0.4.dist-info/RECORD,,

{cortex_llm-1.0.2.dist-info → cortex_llm-1.0.4.dist-info}/WHEEL RENAMED Viewed

File without changes

{cortex_llm-1.0.2.dist-info → cortex_llm-1.0.4.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{cortex_llm-1.0.2.dist-info → cortex_llm-1.0.4.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{cortex_llm-1.0.2.dist-info → cortex_llm-1.0.4.dist-info}/top_level.txt RENAMED Viewed

File without changes

cortex-llm 1.0.2__py3-none-any.whl → 1.0.4__py3-none-any.whl

cortex-llm 1.0.2py3-none-any.whl → 1.0.4py3-none-any.whl