mlx-stack 0.3.4__tar.gz → 0.3.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mlx_stack-0.3.5/.release-please-manifest.json +3 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/CHANGELOG.md +17 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/Makefile +3 -2
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/PKG-INFO +1 -1
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/pyproject.toml +34 -1
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/src/mlx_stack/_version.py +2 -2
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/src/mlx_stack/cli/bench.py +7 -12
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/src/mlx_stack/cli/config.py +1 -3
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/src/mlx_stack/cli/init.py +3 -7
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/src/mlx_stack/cli/install.py +1 -3
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/src/mlx_stack/cli/logs.py +3 -5
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/src/mlx_stack/cli/models.py +5 -3
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/src/mlx_stack/cli/profile.py +2 -6
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/src/mlx_stack/cli/pull.py +4 -11
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/src/mlx_stack/cli/recommend.py +8 -21
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/src/mlx_stack/cli/setup.py +29 -32
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/src/mlx_stack/cli/up.py +4 -12
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/src/mlx_stack/cli/watch.py +6 -14
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/src/mlx_stack/core/benchmark.py +14 -25
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/src/mlx_stack/core/catalog.py +14 -35
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/src/mlx_stack/core/config.py +10 -8
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/src/mlx_stack/core/deps.py +5 -16
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/src/mlx_stack/core/discovery.py +13 -11
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/src/mlx_stack/core/hardware.py +1 -1
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/src/mlx_stack/core/launchd.py +6 -19
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/src/mlx_stack/core/litellm_gen.py +1 -3
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/src/mlx_stack/core/log_rotation.py +4 -9
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/src/mlx_stack/core/log_viewer.py +6 -9
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/src/mlx_stack/core/models.py +36 -35
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/src/mlx_stack/core/onboarding.py +55 -48
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/src/mlx_stack/core/process.py +9 -20
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/src/mlx_stack/core/pull.py +11 -24
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/src/mlx_stack/core/scoring.py +29 -21
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/src/mlx_stack/core/stack_down.py +9 -9
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/src/mlx_stack/core/stack_init.py +17 -16
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/src/mlx_stack/core/stack_status.py +29 -24
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/src/mlx_stack/core/stack_up.py +100 -94
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/src/mlx_stack/core/watchdog.py +6 -11
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/tests/conftest.py +2 -2
- mlx_stack-0.3.5/tests/factories.py +298 -0
- mlx_stack-0.3.5/tests/fakes.py +190 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/tests/integration/conftest.py +27 -24
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/tests/integration/report.py +3 -3
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/tests/integration/test_catalog_validation.py +8 -11
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/tests/integration/test_harness_compatibility.py +8 -12
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/tests/integration/test_inference_e2e.py +8 -15
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/tests/integration/test_launchd_e2e.py +12 -28
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/tests/integration/test_model_smoke.py +2 -5
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/tests/integration/test_stack_integration.py +7 -16
- mlx_stack-0.3.5/tests/unit/conftest.py +106 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/tests/unit/test_benchmark.py +48 -46
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/tests/unit/test_catalog.py +27 -15
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/tests/unit/test_cli.py +8 -4
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/tests/unit/test_cli_bench.py +58 -25
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/tests/unit/test_cli_down.py +195 -142
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/tests/unit/test_cli_init.py +333 -273
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/tests/unit/test_cli_install.py +14 -38
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/tests/unit/test_cli_logs.py +32 -47
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/tests/unit/test_cli_models.py +104 -146
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/tests/unit/test_cli_profile.py +28 -84
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/tests/unit/test_cli_pull.py +289 -112
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/tests/unit/test_cli_recommend.py +214 -186
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/tests/unit/test_cli_setup.py +51 -36
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/tests/unit/test_cli_status.py +176 -161
- mlx_stack-0.3.5/tests/unit/test_cli_up.py +1148 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/tests/unit/test_cli_watch.py +14 -23
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/tests/unit/test_config.py +4 -7
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/tests/unit/test_cross_area.py +86 -150
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/tests/unit/test_deps.py +16 -35
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/tests/unit/test_discovery.py +4 -2
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/tests/unit/test_hardware.py +5 -8
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/tests/unit/test_launchd.py +14 -30
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/tests/unit/test_lifecycle_fixes.py +66 -168
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/tests/unit/test_litellm_gen.py +3 -12
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/tests/unit/test_log_rotation.py +27 -27
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/tests/unit/test_log_viewer.py +51 -68
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/tests/unit/test_models.py +17 -49
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/tests/unit/test_onboarding.py +109 -37
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/tests/unit/test_ops_cross_area.py +139 -95
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/tests/unit/test_paths.py +1 -3
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/tests/unit/test_process.py +23 -20
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/tests/unit/test_robustness_fixes.py +56 -86
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/tests/unit/test_scoring.py +139 -260
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/tests/unit/test_watchdog.py +199 -98
- mlx_stack-0.3.4/.release-please-manifest.json +0 -3
- mlx_stack-0.3.4/tests/unit/test_cli_up.py +0 -1584
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.factory/init.sh +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.factory/library/architecture.md +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.factory/library/environment.md +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.factory/library/user-testing.md +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.factory/services.yaml +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.factory/settings.json +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.factory/skills/cli-feature/SKILL.md +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.factory/validation/foundation/scrutiny/reviews/configuration-management.json +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.factory/validation/foundation/scrutiny/reviews/dependency-management.json +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.factory/validation/foundation/scrutiny/reviews/fix-catalog-errors-and-families.json +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.factory/validation/foundation/scrutiny/reviews/fix-deps-binary-and-ansi.json +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.factory/validation/foundation/scrutiny/reviews/fix-scaffolding-data-home.json +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.factory/validation/foundation/scrutiny/reviews/hardware-detection.json +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.factory/validation/foundation/scrutiny/reviews/model-catalog.json +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.factory/validation/foundation/scrutiny/reviews/project-scaffolding.json +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.factory/validation/foundation/scrutiny/synthesis.json +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.factory/validation/foundation/scrutiny/synthesis.round1.json +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.factory/validation/foundation/user-testing/flows/foundation-config-basic.json +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.factory/validation/foundation/user-testing/flows/foundation-config-deps.json +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.factory/validation/foundation/user-testing/flows/foundation-profile-catalog.json +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.factory/validation/foundation/user-testing/flows/foundation-setup-profile-core.json +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.factory/validation/foundation/user-testing/synthesis.json +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.factory/validation/lifecycle/scrutiny/reviews/down-command.json +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.factory/validation/lifecycle/scrutiny/reviews/fix-lifecycle-preflight-and-readonly.json +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.factory/validation/lifecycle/scrutiny/reviews/fix-lifecycle-process-robustness.json +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.factory/validation/lifecycle/scrutiny/reviews/fix-lifecycle-typecheck.json +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.factory/validation/lifecycle/scrutiny/reviews/process-management.json +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.factory/validation/lifecycle/scrutiny/reviews/status-command.json +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.factory/validation/lifecycle/scrutiny/reviews/up-command.json +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.factory/validation/lifecycle/scrutiny/synthesis.json +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.factory/validation/lifecycle/scrutiny/synthesis.round1.json +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.factory/validation/lifecycle/user-testing/flows/r1-g1-deps-up-basics.json +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.factory/validation/lifecycle/user-testing/flows/r1-g2-up-startup.json +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.factory/validation/lifecycle/user-testing/flows/r1-g3-up-resilience.json +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.factory/validation/lifecycle/user-testing/flows/r1-g4-down.json +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.factory/validation/lifecycle/user-testing/flows/r1-g5-status.json +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.factory/validation/lifecycle/user-testing/flows/r1-g6-cross.json +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.factory/validation/lifecycle/user-testing/flows/r2-g1-fixes.json +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.factory/validation/lifecycle/user-testing/flows/r2-g2-cross-blockers.json +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.factory/validation/lifecycle/user-testing/synthesis.json +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.factory/validation/lifecycle/user-testing/synthesis.round1.json +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.factory/validation/misc-cross-area/scrutiny/reviews/fix-cross-area-test-rigor.json +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.factory/validation/misc-cross-area/scrutiny/reviews/misc-cross-area-validation.json +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.factory/validation/misc-cross-area/scrutiny/synthesis.json +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.factory/validation/misc-cross-area/scrutiny/synthesis.round1.json +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.factory/validation/misc-cross-area/user-testing/flows/r1-g1-cross-flows.json +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.factory/validation/misc-cross-area/user-testing/flows/r2-g4-cross-port5050.json +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.factory/validation/misc-cross-area/user-testing/synthesis.json +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.factory/validation/misc-cross-area/user-testing/synthesis.round1.json +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.factory/validation/ops/scrutiny/reviews/fix-ops-lint-errors.json +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.factory/validation/ops/scrutiny/reviews/fix-ops-scrutiny-issues.json +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.factory/validation/ops/scrutiny/reviews/fix-ops-typecheck-errors.json +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.factory/validation/ops/scrutiny/reviews/launchd-integration.json +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.factory/validation/ops/scrutiny/reviews/log-rotation.json +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.factory/validation/ops/scrutiny/reviews/logs-command.json +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.factory/validation/ops/scrutiny/reviews/ops-cross-area-validation.json +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.factory/validation/ops/scrutiny/reviews/watchdog-command.json +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.factory/validation/ops/scrutiny/synthesis.json +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.factory/validation/ops/scrutiny/synthesis.round1.json +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.factory/validation/ops/user-testing/flows/g1-log.json +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.factory/validation/ops/user-testing/flows/g2-logs-command.json +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.factory/validation/ops/user-testing/flows/g3-watch.json +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.factory/validation/ops/user-testing/flows/g4-launchd.json +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.factory/validation/ops/user-testing/flows/g5-cross-ops.json +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.factory/validation/ops/user-testing/synthesis.json +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.factory/validation/public-ready/scrutiny/reviews/community-docs.json +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.factory/validation/public-ready/scrutiny/reviews/developing-guide.json +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.factory/validation/public-ready/scrutiny/reviews/fix-public-ready-scrutiny.json +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.factory/validation/public-ready/scrutiny/reviews/github-actions-ci.json +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.factory/validation/public-ready/scrutiny/reviews/readme-rewrite.json +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.factory/validation/public-ready/scrutiny/synthesis.json +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.factory/validation/public-ready/scrutiny/synthesis.round1.json +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.factory/validation/recommendation/scrutiny/reviews/fix-init-and-models-issues.json +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.factory/validation/recommendation/scrutiny/reviews/fix-recommendation-scoring-issues.json +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.factory/validation/recommendation/scrutiny/reviews/fix-scoring-lint.json +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.factory/validation/recommendation/scrutiny/reviews/init-command.json +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.factory/validation/recommendation/scrutiny/reviews/models-command.json +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.factory/validation/recommendation/scrutiny/reviews/recommend-command.json +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.factory/validation/recommendation/scrutiny/reviews/scoring-engine.json +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.factory/validation/recommendation/scrutiny/synthesis.json +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.factory/validation/recommendation/scrutiny/synthesis.round1.json +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.factory/validation/recommendation/user-testing/flows/g1-recommend-budget-ranking.json +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.factory/validation/recommendation/user-testing/flows/g2-recommend-output-integration.json +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.factory/validation/recommendation/user-testing/flows/g3-init-core-routing.json +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.factory/validation/recommendation/user-testing/flows/g4-init-cloud-overwrite.json +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.factory/validation/recommendation/user-testing/flows/g5-init-hardware-summary.json +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.factory/validation/recommendation/user-testing/flows/g6-models-local.json +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.factory/validation/recommendation/user-testing/flows/g7-models-catalog.json +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.factory/validation/recommendation/user-testing/flows/r2-g1-recommend.json +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.factory/validation/recommendation/user-testing/flows/r2-g2-models-catalog-filters.json +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.factory/validation/recommendation/user-testing/flows/r2-g3-cross-012.json +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.factory/validation/recommendation/user-testing/synthesis.json +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.factory/validation/recommendation/user-testing/synthesis.round1.json +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.factory/validation/tooling/scrutiny/reviews/bench-command.json +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.factory/validation/tooling/scrutiny/reviews/fix-tooling-scrutiny-issues.json +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.factory/validation/tooling/scrutiny/reviews/pull-command.json +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.factory/validation/tooling/scrutiny/synthesis.json +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.factory/validation/tooling/scrutiny/synthesis.round1.json +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.factory/validation/tooling/user-testing/flows/g1-pull-core.json +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.factory/validation/tooling/user-testing/flows/g2-pull-errors.json +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.factory/validation/tooling/user-testing/flows/g3-bench-core.json +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.factory/validation/tooling/user-testing/flows/g4-bench-advanced.json +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.factory/validation/tooling/user-testing/flows/r2-g1-pull.json +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.factory/validation/tooling/user-testing/flows/r2-g2-bench.json +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.factory/validation/tooling/user-testing/flows/r3-g1-pull.json +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.factory/validation/tooling/user-testing/flows/r3-g2-bench.json +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.factory/validation/tooling/user-testing/flows/r4-g1-bench.json +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.factory/validation/tooling/user-testing/synthesis.json +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.factory/validation/tooling/user-testing/synthesis.round1.json +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.factory/validation/tooling/user-testing/synthesis.round2.json +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.factory/validation/tooling/user-testing/synthesis.round3.json +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.github/release.yml +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.github/workflows/ci.yml +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.github/workflows/integration-nightly.yml +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.github/workflows/integration-prerelease.yml +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.github/workflows/publish.yml +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.github/workflows/release-please.yml +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/.gitignore +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/CODE_OF_CONDUCT.md +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/CONTRIBUTING.md +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/DEVELOPING.md +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/LICENSE +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/README.md +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/SECURITY.md +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/release-please-config.json +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/src/mlx_stack/__init__.py +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/src/mlx_stack/cli/__init__.py +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/src/mlx_stack/cli/down.py +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/src/mlx_stack/cli/main.py +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/src/mlx_stack/cli/status.py +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/src/mlx_stack/core/__init__.py +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/src/mlx_stack/core/paths.py +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/src/mlx_stack/data/__init__.py +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/src/mlx_stack/data/benchmark_data.json +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/src/mlx_stack/data/catalog/__init__.py +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/src/mlx_stack/data/catalog/deepseek-r1-32b.yaml +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/src/mlx_stack/data/catalog/deepseek-r1-8b.yaml +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/src/mlx_stack/data/catalog/gemma3-12b.yaml +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/src/mlx_stack/data/catalog/gemma3-27b.yaml +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/src/mlx_stack/data/catalog/gemma3-4b.yaml +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/src/mlx_stack/data/catalog/llama3.3-8b.yaml +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/src/mlx_stack/data/catalog/nemotron-49b.yaml +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/src/mlx_stack/data/catalog/nemotron-8b.yaml +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/src/mlx_stack/data/catalog/qwen3-8b.yaml +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/src/mlx_stack/data/catalog/qwen3.5-0.8b.yaml +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/src/mlx_stack/data/catalog/qwen3.5-14b.yaml +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/src/mlx_stack/data/catalog/qwen3.5-32b.yaml +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/src/mlx_stack/data/catalog/qwen3.5-3b.yaml +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/src/mlx_stack/data/catalog/qwen3.5-72b.yaml +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/src/mlx_stack/data/catalog/qwen3.5-8b.yaml +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/src/mlx_stack/py.typed +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/src/mlx_stack/utils/__init__.py +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/tests/__init__.py +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/tests/integration/__init__.py +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/tests/unit/__init__.py +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/tests/unit/test_cli_config.py +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/tests/unit/test_data_dir.py +0 -0
- {mlx_stack-0.3.4 → mlx_stack-0.3.5}/uv.lock +0 -0
|
@@ -4,6 +4,23 @@ All notable changes to this project will be documented in this file.
|
|
|
4
4
|
|
|
5
5
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
6
6
|
|
|
7
|
+
## [0.3.5](https://github.com/weklund/mlx-stack/compare/v0.3.4...v0.3.5) (2026-04-04)
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
### Features
|
|
11
|
+
|
|
12
|
+
* expand ruff lint rules with tier 1+2 quality rulesets ([#22](https://github.com/weklund/mlx-stack/issues/22)) ([75490f6](https://github.com/weklund/mlx-stack/commit/75490f6817a87a6b63818fa1f7c1660e59766ba3))
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
### Refactors
|
|
16
|
+
|
|
17
|
+
* **tests:** replace brittle mock-heavy tests with behavioral tests and shared factories ([#32](https://github.com/weklund/mlx-stack/issues/32)) ([9af6078](https://github.com/weklund/mlx-stack/commit/9af60781e14c1466219ee2644928db9c9a39041e))
|
|
18
|
+
- `FakeServiceLayer` replaces 10-deep `@patch` stacks in `TestRunUp`
|
|
19
|
+
- Consolidate ~50 duplicate helpers into `tests/factories.py`
|
|
20
|
+
- AAA comments (`# Arrange`, `# Act`, `# Assert`) across 17 test files
|
|
21
|
+
- `make lint` now includes pyright for shift-left type checking
|
|
22
|
+
- Net: -577 lines, 1,481 tests pass, 73% reduction in `@patch` usage
|
|
23
|
+
|
|
7
24
|
## [0.3.4](https://github.com/weklund/mlx-stack/compare/v0.3.3...v0.3.4) (2026-04-03)
|
|
8
25
|
|
|
9
26
|
|
|
@@ -4,11 +4,12 @@
|
|
|
4
4
|
install:
|
|
5
5
|
uv sync --dev
|
|
6
6
|
|
|
7
|
-
## Lint source and tests
|
|
7
|
+
## Lint source and tests (ruff + pyright)
|
|
8
8
|
lint:
|
|
9
9
|
uv run ruff check src/ tests/
|
|
10
|
+
uv run python -m pyright
|
|
10
11
|
|
|
11
|
-
## Run type checker
|
|
12
|
+
## Run type checker only (alias kept for CI compatibility)
|
|
12
13
|
typecheck:
|
|
13
14
|
uv run python -m pyright
|
|
14
15
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: mlx-stack
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.5
|
|
4
4
|
Summary: CLI control plane for local LLM infrastructure on Apple Silicon
|
|
5
5
|
Project-URL: Homepage, https://github.com/weklund/mlx-stack
|
|
6
6
|
Project-URL: Repository, https://github.com/weklund/mlx-stack
|
|
@@ -67,7 +67,40 @@ line-length = 100
|
|
|
67
67
|
src = ["src", "tests"]
|
|
68
68
|
|
|
69
69
|
[tool.ruff.lint]
|
|
70
|
-
select = [
|
|
70
|
+
select = [
|
|
71
|
+
# Tier 1 — high value, very safe
|
|
72
|
+
"E", # pycodestyle errors
|
|
73
|
+
"F", # pyflakes
|
|
74
|
+
"I", # isort
|
|
75
|
+
"W", # pycodestyle warnings
|
|
76
|
+
"UP", # pyupgrade — modern Python syntax
|
|
77
|
+
"B", # bugbear — common bug patterns
|
|
78
|
+
"SIM", # simplify — reduce complexity
|
|
79
|
+
"C4", # flake8-comprehensions
|
|
80
|
+
"RUF", # Ruff-specific rules
|
|
81
|
+
# Tier 2 — strong value, minor tuning
|
|
82
|
+
"PIE", # misc cleanup
|
|
83
|
+
"RET", # return simplification
|
|
84
|
+
"PERF", # performance anti-patterns
|
|
85
|
+
"PT", # pytest style
|
|
86
|
+
# "C90", # mccabe complexity — enable after refactoring complex functions
|
|
87
|
+
"FURB", # modernization
|
|
88
|
+
"FLY", # f-string conversion
|
|
89
|
+
]
|
|
90
|
+
ignore = [
|
|
91
|
+
"E501", # line length (formatter handles this)
|
|
92
|
+
"SIM108", # ternary operator (opinionated)
|
|
93
|
+
"SIM117", # nested with statements (clearer in test mocking patterns)
|
|
94
|
+
"PT018", # composite assertions (splitting weakens error messages)
|
|
95
|
+
"PT019", # fixture without value (usefixtures less readable)
|
|
96
|
+
"PT017", # assert in except (valid test pattern)
|
|
97
|
+
]
|
|
98
|
+
|
|
99
|
+
[tool.ruff.lint.mccabe]
|
|
100
|
+
max-complexity = 10
|
|
101
|
+
|
|
102
|
+
[tool.ruff.lint.per-file-ignores]
|
|
103
|
+
"src/mlx_stack/_version.py" = ["RUF022"] # auto-generated by hatch-vcs
|
|
71
104
|
|
|
72
105
|
[tool.pyright]
|
|
73
106
|
pythonVersion = "3.13"
|
|
@@ -18,7 +18,7 @@ version_tuple: tuple[int | str, ...]
|
|
|
18
18
|
commit_id: str | None
|
|
19
19
|
__commit_id__: str | None
|
|
20
20
|
|
|
21
|
-
__version__ = version = '0.3.
|
|
22
|
-
__version_tuple__ = version_tuple = (0, 3,
|
|
21
|
+
__version__ = version = '0.3.5'
|
|
22
|
+
__version_tuple__ = version_tuple = (0, 3, 5)
|
|
23
23
|
|
|
24
24
|
__commit_id__ = commit_id = None
|
|
@@ -168,23 +168,16 @@ def _display_results(result: BenchmarkResult_, out: Console, save: bool = False)
|
|
|
168
168
|
out.print(Text("Tool Calling", style="bold cyan"))
|
|
169
169
|
tc = result.tool_call_result
|
|
170
170
|
if tc.success:
|
|
171
|
-
out.print(
|
|
172
|
-
f" [green]✓ Valid tool call[/green] — "
|
|
173
|
-
f"round-trip: {tc.round_trip_time:.2f}s"
|
|
174
|
-
)
|
|
171
|
+
out.print(f" [green]✓ Valid tool call[/green] — round-trip: {tc.round_trip_time:.2f}s")
|
|
175
172
|
else:
|
|
176
|
-
out.print(
|
|
177
|
-
f" [red]✗ Tool call failed[/red] — {tc.error}"
|
|
178
|
-
)
|
|
173
|
+
out.print(f" [red]✗ Tool call failed[/red] — {tc.error}")
|
|
179
174
|
out.print()
|
|
180
175
|
elif not result.tool_call_result:
|
|
181
176
|
# Check if model supports tool calling from entry
|
|
182
177
|
if not result.catalog_data_available:
|
|
183
178
|
pass # Skip silently if no catalog data
|
|
184
179
|
else:
|
|
185
|
-
out.print(
|
|
186
|
-
"[dim]Tool calling: skipped (model does not support tool calling)[/dim]"
|
|
187
|
-
)
|
|
180
|
+
out.print("[dim]Tool calling: skipped (model does not support tool calling)[/dim]")
|
|
188
181
|
out.print()
|
|
189
182
|
|
|
190
183
|
# Iteration details
|
|
@@ -212,6 +205,8 @@ def _display_results(result: BenchmarkResult_, out: Console, save: bool = False)
|
|
|
212
205
|
|
|
213
206
|
# Save confirmation
|
|
214
207
|
if save:
|
|
215
|
-
out.print(
|
|
216
|
-
|
|
208
|
+
out.print(
|
|
209
|
+
"[green]✓ Results saved.[/green] "
|
|
210
|
+
"These will be used by 'recommend' and 'init' for scoring."
|
|
211
|
+
)
|
|
217
212
|
out.print()
|
|
@@ -138,9 +138,7 @@ def config_reset(yes: bool, force: bool) -> None:
|
|
|
138
138
|
# Check if stdin is a TTY for interactive confirmation
|
|
139
139
|
try:
|
|
140
140
|
if click.get_text_stream("stdin").isatty():
|
|
141
|
-
confirmed = click.confirm(
|
|
142
|
-
"Reset all configuration to defaults?", default=False
|
|
143
|
-
)
|
|
141
|
+
confirmed = click.confirm("Reset all configuration to defaults?", default=False)
|
|
144
142
|
else:
|
|
145
143
|
console.print(
|
|
146
144
|
"[bold red]Error:[/bold red] Reset requires --yes or --force flag "
|
|
@@ -64,13 +64,10 @@ def _display_summary(result: dict) -> None:
|
|
|
64
64
|
budget_gb = result["memory_budget_gb"]
|
|
65
65
|
total_memory_gb = result.get("total_memory_gb", 0.0)
|
|
66
66
|
out.print(
|
|
67
|
-
f"[dim]Hardware: {profile.chip} ({profile.memory_gb} GB) · "
|
|
68
|
-
f"Budget: {budget_gb:.1f} GB[/dim]"
|
|
67
|
+
f"[dim]Hardware: {profile.chip} ({profile.memory_gb} GB) · Budget: {budget_gb:.1f} GB[/dim]"
|
|
69
68
|
)
|
|
70
69
|
if total_memory_gb > 0:
|
|
71
|
-
out.print(
|
|
72
|
-
f"[dim]Total estimated memory: {total_memory_gb:.1f} GB[/dim]"
|
|
73
|
-
)
|
|
70
|
+
out.print(f"[dim]Total estimated memory: {total_memory_gb:.1f} GB[/dim]")
|
|
74
71
|
|
|
75
72
|
# Warnings (e.g., memory budget exceeded with --add)
|
|
76
73
|
init_warnings = result.get("warnings", [])
|
|
@@ -83,8 +80,7 @@ def _display_summary(result: dict) -> None:
|
|
|
83
80
|
if stack.get("cloud_fallback"):
|
|
84
81
|
out.print()
|
|
85
82
|
out.print(
|
|
86
|
-
"[bold green]☁ Cloud Fallback[/bold green] "
|
|
87
|
-
"Premium tier via OpenRouter configured"
|
|
83
|
+
"[bold green]☁ Cloud Fallback[/bold green] Premium tier via OpenRouter configured"
|
|
88
84
|
)
|
|
89
85
|
|
|
90
86
|
# Missing models warning
|
|
@@ -42,9 +42,7 @@ def _display_status(status: AgentStatus) -> None:
|
|
|
42
42
|
if not status.installed:
|
|
43
43
|
out.print(Text("Status: not installed", style="dim"))
|
|
44
44
|
elif status.running and status.pid is not None:
|
|
45
|
-
out.print(
|
|
46
|
-
Text(f"Status: installed and running (PID {status.pid})", style="green")
|
|
47
|
-
)
|
|
45
|
+
out.print(Text(f"Status: installed and running (PID {status.pid})", style="green"))
|
|
48
46
|
else:
|
|
49
47
|
out.print(Text("Status: installed but not running", style="yellow"))
|
|
50
48
|
|
|
@@ -6,6 +6,7 @@ trigger on-demand rotation, and view archived logs.
|
|
|
6
6
|
|
|
7
7
|
from __future__ import annotations
|
|
8
8
|
|
|
9
|
+
import contextlib
|
|
9
10
|
import sys
|
|
10
11
|
|
|
11
12
|
import click
|
|
@@ -78,7 +79,7 @@ def _display_rotation_results(results: list) -> None:
|
|
|
78
79
|
out.print(f"[green]✓[/green] {result.service}: rotated")
|
|
79
80
|
any_rotated = True
|
|
80
81
|
else:
|
|
81
|
-
out.print(f"[dim]
|
|
82
|
+
out.print(f"[dim]-[/dim] {result.service}: no rotation needed")
|
|
82
83
|
|
|
83
84
|
if not results:
|
|
84
85
|
out.print(Text("No log files found to rotate.", style="yellow"))
|
|
@@ -213,11 +214,8 @@ def logs(
|
|
|
213
214
|
# Handle --follow mode
|
|
214
215
|
if follow:
|
|
215
216
|
num = tail_lines if tail_lines is not None else DEFAULT_TAIL_LINES
|
|
216
|
-
|
|
217
|
+
with contextlib.suppress(KeyboardInterrupt):
|
|
217
218
|
follow_log(log_path, num_lines=num, output_callback=click.echo)
|
|
218
|
-
except KeyboardInterrupt:
|
|
219
|
-
# Belt-and-suspenders: ensure clean exit
|
|
220
|
-
pass
|
|
221
219
|
return
|
|
222
220
|
|
|
223
221
|
# Default: show tail of log
|
|
@@ -80,7 +80,7 @@ def _display_local_models() -> None:
|
|
|
80
80
|
indicator_style = "bold green" if model.is_active else ""
|
|
81
81
|
|
|
82
82
|
# Display name: prefer catalog name, fall back to directory name
|
|
83
|
-
display_name = model.catalog_name
|
|
83
|
+
display_name = model.catalog_name or model.name
|
|
84
84
|
|
|
85
85
|
# Size
|
|
86
86
|
size_str = format_size(model.disk_size_bytes)
|
|
@@ -217,7 +217,7 @@ def _display_catalog(
|
|
|
217
217
|
local_style = "bold green" if cm.is_local else ""
|
|
218
218
|
|
|
219
219
|
# Parameters
|
|
220
|
-
params_str = f"{cm.params_b:.1f}B"
|
|
220
|
+
params_str = f"{cm.params_b:.1f}B"
|
|
221
221
|
|
|
222
222
|
# Quantizations
|
|
223
223
|
quants_str = ", ".join(cm.quants)
|
|
@@ -270,7 +270,9 @@ def _display_catalog(
|
|
|
270
270
|
@click.option("--family", default=None, help="Filter catalog by model family (e.g., 'qwen3.5').")
|
|
271
271
|
@click.option("--tag", default=None, help="Filter catalog by tag (e.g., 'agent-ready').")
|
|
272
272
|
@click.option(
|
|
273
|
-
"--tool-calling",
|
|
273
|
+
"--tool-calling",
|
|
274
|
+
"tool_calling",
|
|
275
|
+
is_flag=True,
|
|
274
276
|
help="Filter catalog to tool-calling-capable models only.",
|
|
275
277
|
)
|
|
276
278
|
def models(
|
|
@@ -52,12 +52,8 @@ def profile() -> None:
|
|
|
52
52
|
|
|
53
53
|
if hw.is_estimate:
|
|
54
54
|
out.print()
|
|
55
|
-
out.print(
|
|
56
|
-
|
|
57
|
-
)
|
|
58
|
-
out.print(
|
|
59
|
-
" Run [bold]mlx-stack bench --save[/bold] to calibrate with real measurements."
|
|
60
|
-
)
|
|
55
|
+
out.print("[yellow]⚠ Bandwidth is estimated for unknown chip.[/yellow]")
|
|
56
|
+
out.print(" Run [bold]mlx-stack bench --save[/bold] to calibrate with real measurements.")
|
|
61
57
|
|
|
62
58
|
out.print()
|
|
63
59
|
from mlx_stack.core.paths import get_profile_path
|
|
@@ -115,20 +115,13 @@ def _run_post_download_bench(model_id: str, quant: str, out: Console) -> None:
|
|
|
115
115
|
from mlx_stack.core.benchmark import BenchmarkError, run_benchmark
|
|
116
116
|
|
|
117
117
|
result = run_benchmark(target=model_id, save=True)
|
|
118
|
-
out.print(
|
|
119
|
-
|
|
120
|
-
)
|
|
121
|
-
out.print(
|
|
122
|
-
f" Gen TPS: {result.gen_tps_mean:.1f} ± {result.gen_tps_std:.1f} tok/s"
|
|
123
|
-
)
|
|
118
|
+
out.print(f" Prompt TPS: {result.prompt_tps_mean:.1f} ± {result.prompt_tps_std:.1f} tok/s")
|
|
119
|
+
out.print(f" Gen TPS: {result.gen_tps_mean:.1f} ± {result.gen_tps_std:.1f} tok/s")
|
|
124
120
|
out.print()
|
|
125
|
-
out.print(
|
|
126
|
-
"[dim]Results saved for use by 'recommend' and 'init' scoring.[/dim]"
|
|
127
|
-
)
|
|
121
|
+
out.print("[dim]Results saved for use by 'recommend' and 'init' scoring.[/dim]")
|
|
128
122
|
except BenchmarkError as exc:
|
|
129
123
|
out.print(
|
|
130
|
-
f"[yellow]Benchmark failed: {exc}[/yellow]\
|
|
131
|
-
f"Run 'mlx-stack bench {model_id}' to retry."
|
|
124
|
+
f"[yellow]Benchmark failed: {exc}[/yellow]\nRun 'mlx-stack bench {model_id}' to retry."
|
|
132
125
|
)
|
|
133
126
|
except Exception as exc:
|
|
134
127
|
out.print(
|
|
@@ -67,9 +67,7 @@ def parse_budget(raw: str) -> float:
|
|
|
67
67
|
|
|
68
68
|
value = float(match.group(1))
|
|
69
69
|
if value <= 0:
|
|
70
|
-
msg =
|
|
71
|
-
f"Invalid budget '{raw}'. Budget must be a positive value."
|
|
72
|
-
)
|
|
70
|
+
msg = f"Invalid budget '{raw}'. Budget must be a positive value."
|
|
73
71
|
raise click.BadParameter(msg, param_hint="'--budget'")
|
|
74
72
|
|
|
75
73
|
return value
|
|
@@ -96,8 +94,7 @@ def _resolve_profile() -> HardwareProfile:
|
|
|
96
94
|
# Auto-detect (in-memory only — recommend is display-only, no file writes)
|
|
97
95
|
console.print("[dim]No saved profile found — detecting hardware...[/dim]")
|
|
98
96
|
try:
|
|
99
|
-
|
|
100
|
-
return profile
|
|
97
|
+
return detect_hardware()
|
|
101
98
|
except HardwareError as exc:
|
|
102
99
|
console.print(f"[bold red]Error:[/bold red] {exc}")
|
|
103
100
|
raise SystemExit(1) from None
|
|
@@ -206,12 +203,8 @@ def _display_tier_table(result: RecommendationResult) -> None:
|
|
|
206
203
|
has_estimates = any(t.model.is_estimated for t in result.tiers)
|
|
207
204
|
if has_estimates:
|
|
208
205
|
out.print()
|
|
209
|
-
out.print(
|
|
210
|
-
|
|
211
|
-
)
|
|
212
|
-
out.print(
|
|
213
|
-
" Run [bold]mlx-stack bench --save[/bold] to calibrate with real measurements."
|
|
214
|
-
)
|
|
206
|
+
out.print("[yellow]⚠ Some performance values are estimated from bandwidth ratio.[/yellow]")
|
|
207
|
+
out.print(" Run [bold]mlx-stack bench --save[/bold] to calibrate with real measurements.")
|
|
215
208
|
|
|
216
209
|
out.print()
|
|
217
210
|
out.print("[dim]This is a recommendation only — no files were written.[/dim]")
|
|
@@ -268,20 +261,15 @@ def _display_all_models(result: RecommendationResult) -> None:
|
|
|
268
261
|
if openrouter_key:
|
|
269
262
|
out.print()
|
|
270
263
|
out.print(
|
|
271
|
-
"[bold green]☁ Cloud Fallback[/bold green] "
|
|
272
|
-
"Premium tier via OpenRouter also available."
|
|
264
|
+
"[bold green]☁ Cloud Fallback[/bold green] Premium tier via OpenRouter also available."
|
|
273
265
|
)
|
|
274
266
|
|
|
275
267
|
# Estimated warning
|
|
276
268
|
has_estimates = any(m.is_estimated for m in result.all_scored)
|
|
277
269
|
if has_estimates:
|
|
278
270
|
out.print()
|
|
279
|
-
out.print(
|
|
280
|
-
|
|
281
|
-
)
|
|
282
|
-
out.print(
|
|
283
|
-
" Run [bold]mlx-stack bench --save[/bold] to calibrate with real measurements."
|
|
284
|
-
)
|
|
271
|
+
out.print("[yellow]⚠ Some performance values are estimated from bandwidth ratio.[/yellow]")
|
|
272
|
+
out.print(" Run [bold]mlx-stack bench --save[/bold] to calibrate with real measurements.")
|
|
285
273
|
|
|
286
274
|
out.print()
|
|
287
275
|
out.print("[dim]This is a recommendation only — no files were written.[/dim]")
|
|
@@ -329,8 +317,7 @@ def recommend(budget: str | None, intent: str | None, show_all: bool) -> None:
|
|
|
329
317
|
elif intent not in VALID_INTENTS:
|
|
330
318
|
valid = ", ".join(sorted(VALID_INTENTS))
|
|
331
319
|
console.print(
|
|
332
|
-
f"[bold red]Error:[/bold red] Invalid intent '{intent}'. "
|
|
333
|
-
f"Valid intents: {valid}"
|
|
320
|
+
f"[bold red]Error:[/bold red] Invalid intent '{intent}'. Valid intents: {valid}"
|
|
334
321
|
)
|
|
335
322
|
raise SystemExit(1)
|
|
336
323
|
|
|
@@ -170,9 +170,7 @@ def _prompt_model_selection(
|
|
|
170
170
|
Input like '1:int8,3' = model 1 as int8, model 3 as default quant.
|
|
171
171
|
"""
|
|
172
172
|
if accept_defaults:
|
|
173
|
-
return [
|
|
174
|
-
(i, s.model.quant) for i, s in enumerate(scored) if s.is_recommended
|
|
175
|
-
]
|
|
173
|
+
return [(i, s.model.quant) for i, s in enumerate(scored) if s.is_recommended]
|
|
176
174
|
|
|
177
175
|
out.print()
|
|
178
176
|
raw = click.prompt(
|
|
@@ -184,9 +182,7 @@ def _prompt_model_selection(
|
|
|
184
182
|
|
|
185
183
|
if not raw.strip():
|
|
186
184
|
# Accept defaults
|
|
187
|
-
return [
|
|
188
|
-
(i, s.model.quant) for i, s in enumerate(scored) if s.is_recommended
|
|
189
|
-
]
|
|
185
|
+
return [(i, s.model.quant) for i, s in enumerate(scored) if s.is_recommended]
|
|
190
186
|
|
|
191
187
|
# Parse input
|
|
192
188
|
selections: list[tuple[int, str]] = []
|
|
@@ -245,8 +241,8 @@ def _display_final_status(tiers: list[Any], litellm_port: int) -> None:
|
|
|
245
241
|
out.print(
|
|
246
242
|
f" curl http://localhost:{litellm_port}/v1/chat/completions \\\n"
|
|
247
243
|
f" -H 'Content-Type: application/json' \\\n"
|
|
248
|
-
f
|
|
249
|
-
f"
|
|
244
|
+
f' -d \'{{"model":"{tiers[0].tier_name}",'
|
|
245
|
+
f'"messages":[{{"role":"user","content":"Hello!"}}]}}\''
|
|
250
246
|
)
|
|
251
247
|
out.print()
|
|
252
248
|
out.print(" [dim]Manage your stack:[/dim]")
|
|
@@ -343,10 +339,7 @@ def setup(
|
|
|
343
339
|
raise SystemExit(1) from None
|
|
344
340
|
|
|
345
341
|
if not all_models:
|
|
346
|
-
console.print(
|
|
347
|
-
"[bold red]Error:[/bold red] No models found. "
|
|
348
|
-
"Check your network connection."
|
|
349
|
-
)
|
|
342
|
+
console.print("[bold red]Error:[/bold red] No models found. Check your network connection.")
|
|
350
343
|
raise SystemExit(1) from None
|
|
351
344
|
|
|
352
345
|
scored = score_and_filter(all_models, intent, budget_gb)
|
|
@@ -400,25 +393,29 @@ def setup(
|
|
|
400
393
|
thinking=s.model.thinking,
|
|
401
394
|
has_benchmark=s.model.has_benchmark,
|
|
402
395
|
)
|
|
403
|
-
selected_models.append(
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
396
|
+
selected_models.append(
|
|
397
|
+
ScoredDiscoveredModel(
|
|
398
|
+
model=new_model,
|
|
399
|
+
composite_score=s.composite_score,
|
|
400
|
+
speed_score=s.speed_score,
|
|
401
|
+
quality_score=s.quality_score,
|
|
402
|
+
tool_calling_score=s.tool_calling_score,
|
|
403
|
+
memory_efficiency_score=s.memory_efficiency_score,
|
|
404
|
+
is_recommended=True,
|
|
405
|
+
)
|
|
406
|
+
)
|
|
412
407
|
else:
|
|
413
|
-
selected_models.append(
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
408
|
+
selected_models.append(
|
|
409
|
+
ScoredDiscoveredModel(
|
|
410
|
+
model=s.model,
|
|
411
|
+
composite_score=s.composite_score,
|
|
412
|
+
speed_score=s.speed_score,
|
|
413
|
+
quality_score=s.quality_score,
|
|
414
|
+
tool_calling_score=s.tool_calling_score,
|
|
415
|
+
memory_efficiency_score=s.memory_efficiency_score,
|
|
416
|
+
is_recommended=True,
|
|
417
|
+
)
|
|
418
|
+
)
|
|
422
419
|
|
|
423
420
|
# ── Step 4: Tier assignment ──────────────────────────────────────────
|
|
424
421
|
tiers = assign_tiers(selected_models)
|
|
@@ -443,7 +440,7 @@ def setup(
|
|
|
443
440
|
raise SystemExit(0) from None
|
|
444
441
|
|
|
445
442
|
try:
|
|
446
|
-
stack_path,
|
|
443
|
+
stack_path, _litellm_path = generate_config(
|
|
447
444
|
profile=profile,
|
|
448
445
|
intent=intent,
|
|
449
446
|
tier_mappings=tiers,
|
|
@@ -458,7 +455,7 @@ def setup(
|
|
|
458
455
|
out.print(" " + "─" * 40)
|
|
459
456
|
|
|
460
457
|
models_to_pull = [t.model for t in tiers]
|
|
461
|
-
for i,
|
|
458
|
+
for i, _model in enumerate(models_to_pull, 1):
|
|
462
459
|
out.print(f" [bold][{i}/{len(models_to_pull)}][/bold]", end=" ")
|
|
463
460
|
|
|
464
461
|
try:
|
|
@@ -55,9 +55,7 @@ def _display_summary(result: UpResult) -> None:
|
|
|
55
55
|
out.print()
|
|
56
56
|
|
|
57
57
|
if result.already_running:
|
|
58
|
-
out.print(
|
|
59
|
-
Text("All services are already running.", style="bold yellow")
|
|
60
|
-
)
|
|
58
|
+
out.print(Text("All services are already running.", style="bold yellow"))
|
|
61
59
|
out.print()
|
|
62
60
|
|
|
63
61
|
# Warnings
|
|
@@ -115,14 +113,10 @@ def _display_summary(result: UpResult) -> None:
|
|
|
115
113
|
out.print()
|
|
116
114
|
|
|
117
115
|
# Next steps for healthy stacks
|
|
118
|
-
any_healthy = any(
|
|
119
|
-
t.status in ("healthy", "already-running") for t in result.tiers
|
|
120
|
-
)
|
|
116
|
+
any_healthy = any(t.status in ("healthy", "already-running") for t in result.tiers)
|
|
121
117
|
if any_healthy:
|
|
122
118
|
litellm_port = result.litellm.port if result.litellm else 4000
|
|
123
|
-
out.print(
|
|
124
|
-
f"[dim]Endpoint: http://localhost:{litellm_port}/v1[/dim]"
|
|
125
|
-
)
|
|
119
|
+
out.print(f"[dim]Endpoint: http://localhost:{litellm_port}/v1[/dim]")
|
|
126
120
|
out.print()
|
|
127
121
|
|
|
128
122
|
|
|
@@ -156,8 +150,6 @@ def up(dry_run: bool, tier_filter: str | None) -> None:
|
|
|
156
150
|
_display_summary(result)
|
|
157
151
|
|
|
158
152
|
# Exit with non-zero if all tiers failed
|
|
159
|
-
any_success = any(
|
|
160
|
-
t.status in ("healthy", "already-running", "dry-run") for t in result.tiers
|
|
161
|
-
)
|
|
153
|
+
any_success = any(t.status in ("healthy", "already-running", "dry-run") for t in result.tiers)
|
|
162
154
|
if not any_success and not result.dry_run:
|
|
163
155
|
raise SystemExit(1)
|
|
@@ -7,7 +7,7 @@ crashed services, detects flapping, and triggers log rotation.
|
|
|
7
7
|
from __future__ import annotations
|
|
8
8
|
|
|
9
9
|
import sys
|
|
10
|
-
from datetime import
|
|
10
|
+
from datetime import UTC, datetime
|
|
11
11
|
|
|
12
12
|
import click
|
|
13
13
|
from rich.console import Console
|
|
@@ -53,7 +53,7 @@ def _format_status_table(result: PollResult, state: WatchdogState) -> None:
|
|
|
53
53
|
state: Current watchdog state.
|
|
54
54
|
"""
|
|
55
55
|
out = Console()
|
|
56
|
-
now = datetime.now(tz=
|
|
56
|
+
now = datetime.now(tz=UTC).strftime("%Y-%m-%d %H:%M:%S UTC")
|
|
57
57
|
|
|
58
58
|
out.print()
|
|
59
59
|
out.print(Text(f"[Cycle {state.cycle_count}] {now}", style="bold cyan"))
|
|
@@ -97,9 +97,7 @@ def _format_status_table(result: PollResult, state: WatchdogState) -> None:
|
|
|
97
97
|
Text(
|
|
98
98
|
f" Restarts: {result.restarts_succeeded}/{result.restarts_attempted} succeeded",
|
|
99
99
|
style=(
|
|
100
|
-
"yellow"
|
|
101
|
-
if result.restarts_succeeded < result.restarts_attempted
|
|
102
|
-
else "green"
|
|
100
|
+
"yellow" if result.restarts_succeeded < result.restarts_attempted else "green"
|
|
103
101
|
),
|
|
104
102
|
)
|
|
105
103
|
)
|
|
@@ -112,9 +110,7 @@ def _format_restart_event(record: RestartRecord) -> None:
|
|
|
112
110
|
record: The restart record.
|
|
113
111
|
"""
|
|
114
112
|
out = Console()
|
|
115
|
-
ts = datetime.fromtimestamp(record.timestamp, tz=
|
|
116
|
-
"%Y-%m-%d %H:%M:%S UTC"
|
|
117
|
-
)
|
|
113
|
+
ts = datetime.fromtimestamp(record.timestamp, tz=UTC).strftime("%Y-%m-%d %H:%M:%S UTC")
|
|
118
114
|
status = "✓" if record.success else "✗"
|
|
119
115
|
style = "green" if record.success else "red"
|
|
120
116
|
|
|
@@ -151,9 +147,7 @@ def _validate_positive_int(
|
|
|
151
147
|
click.BadParameter: If value is not positive.
|
|
152
148
|
"""
|
|
153
149
|
if value < 1:
|
|
154
|
-
raise click.BadParameter(
|
|
155
|
-
f"Must be a positive integer (got {value})."
|
|
156
|
-
)
|
|
150
|
+
raise click.BadParameter(f"Must be a positive integer (got {value}).")
|
|
157
151
|
return value
|
|
158
152
|
|
|
159
153
|
|
|
@@ -224,9 +218,7 @@ def watch(
|
|
|
224
218
|
"""
|
|
225
219
|
try:
|
|
226
220
|
if daemon:
|
|
227
|
-
console.print(
|
|
228
|
-
Text("Starting watchdog in daemon mode...", style="bold cyan")
|
|
229
|
-
)
|
|
221
|
+
console.print(Text("Starting watchdog in daemon mode...", style="bold cyan"))
|
|
230
222
|
|
|
231
223
|
run_watchdog(
|
|
232
224
|
interval=interval,
|