PyPI - mlx-stack - Versions diffs - 0.2.0__tar.gz → 0.3.0__tar.gz - Mend

mlx-stack 0.2.0tar.gz → 0.3.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (220) hide show

{mlx_stack-0.2.0 → mlx_stack-0.3.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: mlx-stack
-Version: 0.2.0
+Version: 0.3.0
 Summary: CLI control plane for local LLM infrastructure on Apple Silicon
 Project-URL: Homepage, https://github.com/weklund/mlx-stack
 Project-URL: Repository, https://github.com/weklund/mlx-stack
@@ -326,6 +326,17 @@ The built-in catalog includes 15 models across 5 families:
 Each entry includes benchmark data for common Apple Silicon configurations, quality scores, and capability metadata (tool calling, thinking/reasoning, vision).
+Some models (Gemma 3, Llama 3.3) are **gated** on HuggingFace and require accepting a license before download. `mlx-stack init --accept-defaults` automatically selects non-gated models so the zero-config path works without authentication. To use gated models:
+```bash
+# 1. Accept the model license on huggingface.co
+# 2. Set your token
+export HF_TOKEN=hf_...
+# 3. Pull the gated model
+mlx-stack pull gemma3-12b
+```
 ## Architecture Details
 mlx-stack manages a **tiered local inference stack** with three layers:

{mlx_stack-0.2.0 → mlx_stack-0.3.0}/README.md RENAMED Viewed

@@ -297,6 +297,17 @@ The built-in catalog includes 15 models across 5 families:
 Each entry includes benchmark data for common Apple Silicon configurations, quality scores, and capability metadata (tool calling, thinking/reasoning, vision).
+Some models (Gemma 3, Llama 3.3) are **gated** on HuggingFace and require accepting a license before download. `mlx-stack init --accept-defaults` automatically selects non-gated models so the zero-config path works without authentication. To use gated models:
+```bash
+# 1. Accept the model license on huggingface.co
+# 2. Set your token
+export HF_TOKEN=hf_...
+# 3. Pull the gated model
+mlx-stack pull gemma3-12b
+```
 ## Architecture Details
 mlx-stack manages a **tiered local inference stack** with three layers:

{mlx_stack-0.2.0 → mlx_stack-0.3.0}/src/mlx_stack/_version.py RENAMED Viewed

@@ -18,7 +18,7 @@ version_tuple: tuple[int | str, ...]
 commit_id: str | None
 __commit_id__: str | None
-__version__ = version = '0.2.0'
-__version_tuple__ = version_tuple = (0, 2, 0)
+__version__ = version = '0.3.0'
+__version_tuple__ = version_tuple = (0, 3, 0)
 __commit_id__ = commit_id = None

{mlx_stack-0.2.0 → mlx_stack-0.3.0}/src/mlx_stack/cli/pull.py RENAMED Viewed

@@ -18,6 +18,7 @@ from mlx_stack.core.pull import (
     ConversionError,
     DiskSpaceError,
     DownloadError,
+    GatedModelError,
     InvalidModelError,
     PullError,
     pull_model,
@@ -80,6 +81,9 @@ def pull(model: str, quant: str | None, bench: bool, force: bool) -> None:
     except DiskSpaceError as exc:
         console.print(f"[bold red]Error:[/bold red] {exc}")
         raise SystemExit(1) from None
+    except GatedModelError as exc:
+        console.print(f"[bold red]Authentication required:[/bold red] {exc}")
+        raise SystemExit(1) from None
     except DownloadError as exc:
         console.print(f"[bold red]Download error:[/bold red] {exc}")
         raise SystemExit(1) from None

{mlx_stack-0.2.0 → mlx_stack-0.3.0}/src/mlx_stack/core/catalog.py RENAMED Viewed

@@ -127,6 +127,7 @@ class CatalogEntry:
     quality: QualityScores
     benchmarks: dict[str, BenchmarkResult]
     tags: list[str] = field(default_factory=list)
+    gated: bool = False
 # --------------------------------------------------------------------------- #
@@ -335,6 +336,7 @@ def _parse_entry(data: dict[str, Any]) -> CatalogEntry:
             quality=quality,
             benchmarks=benchmarks,
             tags=list(data.get("tags", [])),
+            gated=bool(data.get("gated", False)),
         )
     except (ValueError, TypeError) as exc:
         msg = f"Catalog entry '{model_id}': invalid top-level field value: {exc}"

{mlx_stack-0.2.0 → mlx_stack-0.3.0}/src/mlx_stack/core/pull.py RENAMED Viewed

@@ -19,6 +19,8 @@ from pathlib import Path
 from typing import Any
 from huggingface_hub import snapshot_download
+from huggingface_hub.errors import GatedRepoError
+from huggingface_hub.utils._auth import get_token
 from rich.console import Console
 from mlx_stack.core.catalog import CatalogEntry, QuantSource, get_entry_by_id, load_catalog
@@ -42,6 +44,10 @@ class DownloadError(PullError):
     """Raised when model download fails."""
+class GatedModelError(DownloadError):
+    """Raised when a gated model requires HuggingFace authentication."""
 class ConversionError(PullError):
     """Raised when mlx_lm conversion fails."""
@@ -321,6 +327,14 @@ def _run_download(
     """
     try:
         snapshot_download(repo_id=hf_repo, local_dir=str(local_dir))
+    except GatedRepoError:
+        msg = (
+            f"Access denied for {hf_repo} — this is a gated model.\n"
+            f"Your HuggingFace token does not have access.\n"
+            f"Accept the model license at: https://huggingface.co/{hf_repo}\n"
+            f"Then retry: mlx-stack pull"
+        )
+        raise GatedModelError(msg) from None
     except Exception as exc:
         msg = f"Download failed for {hf_repo}: {exc}"
         raise DownloadError(msg) from None
@@ -572,6 +586,22 @@ def pull_model(
         )
         raise InvalidModelError(msg)
+    # 1b. Pre-flight auth check for gated models
+    if entry.gated and get_token() is None:
+        msg = (
+            f"Model '{entry.name}' requires HuggingFace authentication "
+            f"(gated model).\n\n"
+            f"To download gated models:\n"
+            f"  1. Accept the model license on HuggingFace\n"
+            f"  2. Authenticate using ONE of:\n"
+            f"     - export HF_TOKEN=hf_...    "
+            f"(get a token at huggingface.co/settings/tokens)\n"
+            f"     - huggingface-cli login\n\n"
+            f"Or use a non-gated alternative:\n"
+            f"  mlx-stack models --catalog"
+        )
+        raise GatedModelError(msg)
     # 2. Determine quantization
     if quant is None:
         try:

{mlx_stack-0.2.0 → mlx_stack-0.3.0}/src/mlx_stack/core/scoring.py RENAMED Viewed

@@ -410,6 +410,7 @@ def score_and_filter(
     budget_gb: float,
     quant: str = _DEFAULT_QUANT,
     saved_benchmarks: dict[str, Any] | None = None,
+    exclude_gated: bool = False,
 ) -> list[ScoredModel]:
     """Score all catalog models and filter by memory budget.
@@ -423,6 +424,8 @@ def score_and_filter(
         budget_gb: Memory budget in GB.
         quant: Quantization level.
         saved_benchmarks: Optional saved benchmark data.
+        exclude_gated: If True, exclude gated models that require
+            HuggingFace authentication.
     Returns:
         List of ScoredModel instances within budget, sorted by score descending.
@@ -439,6 +442,9 @@ def score_and_filter(
     scored: list[ScoredModel] = []
     for entry in catalog:
+        if exclude_gated and entry.gated:
+            continue
         try:
             model = score_model(entry, profile, weights, budget_gb, quant, saved_benchmarks)
         except ScoringError:
@@ -558,6 +564,7 @@ def recommend(
     budget_gb_override: float | None = None,
     quant: str = _DEFAULT_QUANT,
     saved_benchmarks: dict[str, Any] | None = None,
+    exclude_gated: bool = False,
 ) -> RecommendationResult:
     """Generate a recommendation for the given hardware and intent.
@@ -576,6 +583,7 @@ def recommend(
             percentage-based calculation.
         quant: Default quantization level.
         saved_benchmarks: Optional saved benchmark data from bench --save.
+        exclude_gated: If True, exclude gated models from recommendations.
     Returns:
         A RecommendationResult with tier assignments and all scored models.
@@ -596,7 +604,8 @@ def recommend(
     # Score and filter
     scored = score_and_filter(
-        catalog, profile, intent, budget_gb, quant, saved_benchmarks
+        catalog, profile, intent, budget_gb, quant, saved_benchmarks,
+        exclude_gated=exclude_gated,
     )
     # Assign tiers

{mlx_stack-0.2.0 → mlx_stack-0.3.0}/src/mlx_stack/core/stack_init.py RENAMED Viewed

@@ -376,6 +376,7 @@ def run_init(
             profile=profile,
             intent=intent,
             budget_pct=budget_pct,
+            exclude_gated=True,
         )
     except ScoringError as exc:
         msg = f"Recommendation failed: {exc}"
@@ -432,6 +433,14 @@ def run_init(
                 msg = f"Cannot add model '{model_id}': {exc}"
                 raise InitError(msg) from None
+            # Warn if the model is gated (requires HuggingFace auth)
+            if entry.gated:
+                warnings.append(
+                    f"Model '{model_id}' is gated and requires HuggingFace "
+                    f"authentication. Set HF_TOKEN or run 'huggingface-cli login' "
+                    f"before pulling."
+                )
             # Warn if exceeding budget (per spec: warn, not block)
             total_memory = sum(t.model.memory_gb for t in tiers) + scored.memory_gb
             if total_memory > recommendation.memory_budget_gb:

{mlx_stack-0.2.0 → mlx_stack-0.3.0}/src/mlx_stack/data/catalog/gemma3-12b.yaml RENAMED Viewed

@@ -4,6 +4,7 @@ family: Gemma 3
 params_b: 12.0
 architecture: transformer
 min_mlx_lm_version: "0.22.0"
+gated: true
 sources:
   int4:
     hf_repo: mlx-community/gemma-3-12b-it-4bit

{mlx_stack-0.2.0 → mlx_stack-0.3.0}/src/mlx_stack/data/catalog/gemma3-27b.yaml RENAMED Viewed

@@ -4,6 +4,7 @@ family: Gemma 3
 params_b: 27.0
 architecture: transformer
 min_mlx_lm_version: "0.22.0"
+gated: true
 sources:
   int4:
     hf_repo: mlx-community/gemma-3-27b-it-4bit

{mlx_stack-0.2.0 → mlx_stack-0.3.0}/src/mlx_stack/data/catalog/gemma3-4b.yaml RENAMED Viewed

@@ -4,6 +4,7 @@ family: Gemma 3
 params_b: 4.0
 architecture: transformer
 min_mlx_lm_version: "0.22.0"
+gated: true
 sources:
   int4:
     hf_repo: mlx-community/gemma-3-4b-it-4bit

{mlx_stack-0.2.0 → mlx_stack-0.3.0}/src/mlx_stack/data/catalog/llama3.3-8b.yaml RENAMED Viewed

@@ -4,6 +4,7 @@ family: Llama 3.3
 params_b: 8.0
 architecture: transformer
 min_mlx_lm_version: "0.22.0"
+gated: true
 sources:
   int4:
     hf_repo: mlx-community/Llama-3.3-8B-Instruct-4bit

{mlx_stack-0.2.0 → mlx_stack-0.3.0}/tests/unit/test_catalog.py RENAMED Viewed

@@ -19,6 +19,7 @@ from mlx_stack.core.catalog import (
     CatalogError,
     QualityScores,
     QuantSource,
+    _parse_entry,
     get_entry_by_id,
     load_catalog,
     load_catalog_from_directory,
@@ -793,3 +794,47 @@ def _make_valid_entry() -> dict:
         },
         "tags": ["test"],
     }
+# =========================================================================== #
+# Gated field tests
+# =========================================================================== #
+class TestGatedField:
+    """Tests for the CatalogEntry.gated field."""
+    def test_gated_defaults_to_false(self) -> None:
+        """CatalogEntry without gated field defaults to False."""
+        data = _make_valid_entry()
+        entry = _parse_entry(data)
+        assert entry.gated is False
+    def test_gated_true_from_yaml(self) -> None:
+        """CatalogEntry with gated: true parses correctly."""
+        data = _make_valid_entry()
+        data["gated"] = True
+        entry = _parse_entry(data)
+        assert entry.gated is True
+    def test_gated_false_explicit(self) -> None:
+        """Explicit gated: false parses correctly."""
+        data = _make_valid_entry()
+        data["gated"] = False
+        entry = _parse_entry(data)
+        assert entry.gated is False
+    def test_shipped_catalog_gated_models(self) -> None:
+        """Shipped catalog has exactly 4 gated models."""
+        catalog = load_catalog()
+        gated = [e for e in catalog if e.gated]
+        gated_ids = {e.id for e in gated}
+        assert gated_ids == {"gemma3-4b", "gemma3-12b", "gemma3-27b", "llama3.3-8b"}
+    def test_shipped_catalog_non_gated_models(self) -> None:
+        """Shipped catalog non-gated models all have gated=False."""
+        catalog = load_catalog()
+        non_gated = [e for e in catalog if not e.gated]
+        assert len(non_gated) == len(catalog) - 4
+        for entry in non_gated:
+            assert entry.gated is False

{mlx_stack-0.2.0 → mlx_stack-0.3.0}/tests/unit/test_cli_init.py RENAMED Viewed

@@ -98,6 +98,7 @@ def _make_entry(
     benchmarks: dict[str, BenchmarkResult] | None = None,
     tags: list[str] | None = None,
     memory_gb: float = 5.5,
+    gated: bool = False,
 ) -> CatalogEntry:
     """Create a CatalogEntry for testing."""
     if benchmarks is None:
@@ -130,6 +131,7 @@ def _make_entry(
         ),
         benchmarks=benchmarks,
         tags=tags or [],
+        gated=gated,
     )
@@ -1192,3 +1194,64 @@ class TestTotalEstimatedMemory:
         assert result["total_memory_gb"] > 0
         # Total memory should be reasonable (less than total system memory)
         assert result["total_memory_gb"] < profile.memory_gb
+# =========================================================================== #
+# Gated model exclusion tests
+# =========================================================================== #
+class TestGatedModelExclusion:
+    """Tests that gated models are excluded from default init."""
+    def test_init_excludes_gated_models(self, mlx_stack_home: Path) -> None:
+        """Default init excludes gated models from tier assignments."""
+        profile = _make_profile()
+        _write_profile(mlx_stack_home, profile)
+        # Create catalog where the best model is gated
+        catalog = [
+            _make_entry(
+                model_id="gated-best",
+                name="Gated Best",
+                quality_overall=99,
+                gated=True,
+            ),
+            _make_entry(
+                model_id="open-good",
+                name="Open Good",
+                quality_overall=70,
+                gated=False,
+            ),
+        ]
+        with patch("mlx_stack.core.stack_init.load_catalog", return_value=catalog), \
+             patch("mlx_stack.core.stack_init.load_profile", return_value=profile):
+            result = run_init(intent="balanced", force=True)
+        tier_model_ids = {t["model"] for t in result["stack"]["tiers"]}
+        assert "gated-best" not in tier_model_ids
+        assert "open-good" in tier_model_ids
+    def test_add_gated_model_warns(self, mlx_stack_home: Path) -> None:
+        """Adding a gated model via --add produces a warning."""
+        profile = _make_profile()
+        _write_profile(mlx_stack_home, profile)
+        catalog = [
+            _make_entry(model_id="open-model", name="Open Model"),
+            _make_entry(model_id="gated-model", name="Gated Model", gated=True),
+        ]
+        with patch("mlx_stack.core.stack_init.load_catalog", return_value=catalog), \
+             patch("mlx_stack.core.stack_init.load_profile", return_value=profile):
+            result = run_init(
+                intent="balanced",
+                add_models=["gated-model"],
+                force=True,
+            )
+        warnings = result["warnings"]
+        gated_warnings = [w for w in warnings if "gated" in w.lower()]
+        assert len(gated_warnings) >= 1
+        assert "HuggingFace authentication" in gated_warnings[0]

{mlx_stack-0.2.0 → mlx_stack-0.3.0}/tests/unit/test_cli_pull.py RENAMED Viewed

@@ -35,6 +35,7 @@ from mlx_stack.core.pull import (
     ConversionError,
     DiskSpaceError,
     DownloadError,
+    GatedModelError,
     InvalidModelError,
     ModelInventoryEntry,
     PullError,
@@ -65,6 +66,7 @@ def _make_entry(
     disk_size_gb: float = 4.5,
     disk_size_gb_int8: float = 8.5,
     disk_size_gb_bf16: float = 16.0,
+    gated: bool = False,
 ) -> CatalogEntry:
     """Create a CatalogEntry for testing."""
     return CatalogEntry(
@@ -101,6 +103,7 @@ def _make_entry(
             "m4-max-128": BenchmarkResult(prompt_tps=140.0, gen_tps=77.0, memory_gb=5.5),
         },
         tags=["balanced", "agent-ready"],
+        gated=gated,
     )
@@ -1244,3 +1247,103 @@ class TestPullModelsIntegration:
         assert result.exit_code == 0
         # The model directory or catalog name should appear
         assert "qwen3.5-8b-4bit" in result.output or "Qwen 3.5 8B" in result.output
+# =========================================================================== #
+# Gated model handling tests
+# =========================================================================== #
+class TestGatedModelHandling:
+    """Tests for gated model pre-flight check and error handling."""
+    @patch("mlx_stack.core.pull.get_token", return_value=None)
+    def test_gated_model_without_token_raises(
+        self,
+        mock_token: MagicMock,
+        mlx_stack_home: Path,
+    ) -> None:
+        """Gated model without HF token raises GatedModelError."""
+        import pytest
+        catalog = [_make_entry(gated=True)]
+        with pytest.raises(GatedModelError, match="requires HuggingFace authentication"):
+            pull_model("qwen3.5-8b", quant="int4", catalog=catalog)
+    @patch("mlx_stack.core.pull.download_model")
+    @patch("mlx_stack.core.pull.check_disk_space", return_value=(True, 100.0))
+    @patch("mlx_stack.core.pull.get_token", return_value="hf_test_token")
+    def test_gated_model_with_token_proceeds(
+        self,
+        mock_token: MagicMock,
+        mock_space: MagicMock,
+        mock_download: MagicMock,
+        mlx_stack_home: Path,
+    ) -> None:
+        """Gated model with valid token proceeds to download."""
+        catalog = [_make_entry(gated=True)]
+        result = pull_model("qwen3.5-8b", quant="int4", catalog=catalog)
+        assert result.already_existed is False
+        mock_download.assert_called_once()
+    @patch("mlx_stack.core.pull.get_token", return_value=None)
+    def test_non_gated_model_skips_token_check(
+        self,
+        mock_token: MagicMock,
+        mlx_stack_home: Path,
+    ) -> None:
+        """Non-gated model does not check for token."""
+        catalog = [_make_entry(gated=False)]
+        with patch("mlx_stack.core.pull.download_model"):
+            with patch("mlx_stack.core.pull.check_disk_space", return_value=(True, 100.0)):
+                pull_model("qwen3.5-8b", quant="int4", catalog=catalog)
+        # get_token was not called (non-gated path doesn't reach the check)
+        mock_token.assert_not_called()
+    @patch("mlx_stack.core.pull.snapshot_download")
+    def test_gated_repo_error_caught(
+        self,
+        mock_snapshot: MagicMock,
+        tmp_path: Path,
+    ) -> None:
+        """GatedRepoError from snapshot_download becomes GatedModelError."""
+        from io import StringIO
+        from unittest.mock import MagicMock as Mock
+        import pytest
+        from huggingface_hub.errors import GatedRepoError as HfGatedRepoError
+        from rich.console import Console
+        from mlx_stack.core.pull import _run_download
+        # GatedRepoError requires a response object
+        mock_response = Mock()
+        mock_response.status_code = 403
+        mock_response.headers = {}
+        mock_response.url = "https://huggingface.co/test/repo"
+        mock_snapshot.side_effect = HfGatedRepoError(
+            "gated repo", response=mock_response
+        )
+        local_dir = tmp_path / "model"
+        local_dir.mkdir()
+        console = Console(file=StringIO())
+        with pytest.raises(GatedModelError, match="gated model"):
+            _run_download("test/repo", local_dir, console)
+    @patch("mlx_stack.core.pull.get_token", return_value=None)
+    @patch("mlx_stack.core.pull.load_catalog")
+    def test_cli_gated_error_shows_auth_required(
+        self,
+        mock_catalog: MagicMock,
+        mock_token: MagicMock,
+        mlx_stack_home: Path,
+    ) -> None:
+        """CLI shows 'Authentication required' for gated model errors."""
+        mock_catalog.return_value = [_make_entry(gated=True)]
+        runner = CliRunner()
+        result = runner.invoke(cli, ["pull", "qwen3.5-8b"])
+        assert result.exit_code == 1
+        assert "Authentication required" in result.output

{mlx_stack-0.2.0 → mlx_stack-0.3.0}/tests/unit/test_scoring.py RENAMED Viewed

@@ -69,6 +69,7 @@ def _make_entry(
     thinking: bool = False,
     benchmarks: dict[str, BenchmarkResult] | None = None,
     tags: list[str] | None = None,
+    gated: bool = False,
 ) -> CatalogEntry:
     """Helper to create a CatalogEntry for testing."""
     if benchmarks is None:
@@ -103,6 +104,7 @@ def _make_entry(
         ),
         benchmarks=benchmarks,
         tags=tags or ["balanced"],
+        gated=gated,
     )
@@ -1401,3 +1403,54 @@ class TestIntentDifferentiation:
         # The balanced-model should win because its composite score is higher
         # even though slow-quality has higher raw quality
         assert standard.model.entry.id == "balanced-model"
+# =========================================================================== #
+# Gated model filtering tests
+# =========================================================================== #
+class TestExcludeGated:
+    """Tests for exclude_gated parameter in score_and_filter."""
+    def test_exclude_gated_filters_gated_models(self, m4_max_128_profile: HardwareProfile) -> None:
+        """Gated models are excluded when exclude_gated=True."""
+        open_model = _make_entry(model_id="open-model", name="Open Model")
+        gated_model = _make_entry(model_id="gated-model", name="Gated Model", gated=True)
+        scored = score_and_filter(
+            [open_model, gated_model], m4_max_128_profile, "balanced", 51.2,
+            exclude_gated=True,
+        )
+        scored_ids = {m.entry.id for m in scored}
+        assert "open-model" in scored_ids
+        assert "gated-model" not in scored_ids
+    def test_exclude_gated_false_includes_all(self, m4_max_128_profile: HardwareProfile) -> None:
+        """All models included when exclude_gated=False (default)."""
+        open_model = _make_entry(model_id="open-model", name="Open Model")
+        gated_model = _make_entry(model_id="gated-model", name="Gated Model", gated=True)
+        scored = score_and_filter(
+            [open_model, gated_model], m4_max_128_profile, "balanced", 51.2,
+            exclude_gated=False,
+        )
+        scored_ids = {m.entry.id for m in scored}
+        assert "open-model" in scored_ids
+        assert "gated-model" in scored_ids
+    def test_recommend_exclude_gated(self, m4_max_128_profile: HardwareProfile) -> None:
+        """Gated models excluded from tier assignments via recommend()."""
+        open_model = _make_entry(model_id="open-model", name="Open Model")
+        gated_model = _make_entry(
+            model_id="gated-model", name="Gated Model",
+            quality_overall=99, gated=True,
+        )
+        result = recommend(
+            [open_model, gated_model], m4_max_128_profile,
+            exclude_gated=True,
+        )
+        tier_ids = {t.model.entry.id for t in result.tiers}
+        assert "gated-model" not in tier_ids
+        assert "open-model" in tier_ids