mlx-stack 0.3.6__tar.gz → 0.3.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (294) hide show
  1. mlx_stack-0.3.8/.factory/init.sh +7 -0
  2. mlx_stack-0.3.8/.factory/library/architecture.md +80 -0
  3. mlx_stack-0.3.8/.factory/library/environment.md +38 -0
  4. mlx_stack-0.3.8/.factory/library/user-testing.md +57 -0
  5. mlx_stack-0.3.8/.factory/services.yaml +8 -0
  6. mlx_stack-0.3.8/.factory/skills/cli-worker/SKILL.md +145 -0
  7. mlx_stack-0.3.8/.factory/validation/absorb-profile/scrutiny/reviews/absorb-profile-into-status.json +33 -0
  8. mlx_stack-0.3.8/.factory/validation/absorb-profile/scrutiny/reviews/fix-status-estimate-indicator.json +15 -0
  9. mlx_stack-0.3.8/.factory/validation/absorb-profile/scrutiny/synthesis.json +33 -0
  10. mlx_stack-0.3.8/.factory/validation/absorb-profile/scrutiny/synthesis.round1.json +53 -0
  11. mlx_stack-0.3.8/.factory/validation/absorb-profile/user-testing/flows/status-core-cli.json +106 -0
  12. mlx_stack-0.3.8/.factory/validation/absorb-profile/user-testing/flows/status-json-edge.json +122 -0
  13. mlx_stack-0.3.8/.factory/validation/absorb-profile/user-testing/synthesis.json +29 -0
  14. mlx_stack-0.3.8/.factory/validation/absorb-recommend-remove-init/scrutiny/reviews/absorb-recommend-into-models.json +22 -0
  15. mlx_stack-0.3.8/.factory/validation/absorb-recommend-remove-init/scrutiny/reviews/fix-delete-recommend-file-and-stale-refs.json +15 -0
  16. mlx_stack-0.3.8/.factory/validation/absorb-recommend-remove-init/scrutiny/reviews/remove-init-command.json +34 -0
  17. mlx_stack-0.3.8/.factory/validation/absorb-recommend-remove-init/scrutiny/synthesis.json +33 -0
  18. mlx_stack-0.3.8/.factory/validation/absorb-recommend-remove-init/scrutiny/synthesis.round1.json +46 -0
  19. mlx_stack-0.3.8/.factory/validation/absorb-recommend-remove-init/user-testing/flows/cross-integrity.json +141 -0
  20. mlx_stack-0.3.8/.factory/validation/absorb-recommend-remove-init/user-testing/flows/cross-surface.json +190 -0
  21. mlx_stack-0.3.8/.factory/validation/absorb-recommend-remove-init/user-testing/flows/models-available-help.json +214 -0
  22. mlx_stack-0.3.8/.factory/validation/absorb-recommend-remove-init/user-testing/flows/models-core.json +239 -0
  23. mlx_stack-0.3.8/.factory/validation/absorb-recommend-remove-init/user-testing/flows/models-errors.json +113 -0
  24. mlx_stack-0.3.8/.factory/validation/absorb-recommend-remove-init/user-testing/flows/rerun-failed-assertions.json +50 -0
  25. mlx_stack-0.3.8/.factory/validation/absorb-recommend-remove-init/user-testing/synthesis.json +35 -0
  26. mlx_stack-0.3.8/.factory/validation/absorb-recommend-remove-init/user-testing/synthesis.round1.json +74 -0
  27. mlx_stack-0.3.8/.factory/validation/setup-modification/scrutiny/reviews/fix-setup-catalog-id-and-model-start-behavior.json +21 -0
  28. mlx_stack-0.3.8/.factory/validation/setup-modification/scrutiny/reviews/setup-add-remove-flags.json +39 -0
  29. mlx_stack-0.3.8/.factory/validation/setup-modification/scrutiny/reviews/setup-model-and-control-flags.json +34 -0
  30. mlx_stack-0.3.8/.factory/validation/setup-modification/scrutiny/synthesis.json +45 -0
  31. mlx_stack-0.3.8/.factory/validation/setup-modification/scrutiny/synthesis.round1.json +72 -0
  32. mlx_stack-0.3.8/.factory/validation/setup-modification/user-testing/flows/setup-combined-edge.json +165 -0
  33. mlx_stack-0.3.8/.factory/validation/setup-modification/user-testing/flows/setup-mod-core.json +238 -0
  34. mlx_stack-0.3.8/.factory/validation/setup-modification/user-testing/flows/setup-model-controls.json +181 -0
  35. mlx_stack-0.3.8/.factory/validation/setup-modification/user-testing/flows/setup-output-guidance.json +171 -0
  36. mlx_stack-0.3.8/.factory/validation/setup-modification/user-testing/synthesis.json +75 -0
  37. mlx_stack-0.3.8/.factory/validation/ungate-pull/scrutiny/reviews/fix-bench-hf-repo-target-resolution.json +39 -0
  38. mlx_stack-0.3.8/.factory/validation/ungate-pull/scrutiny/reviews/fix-bench-service-name-sanitization.json +15 -0
  39. mlx_stack-0.3.8/.factory/validation/ungate-pull/scrutiny/reviews/ungate-pull-command.json +34 -0
  40. mlx_stack-0.3.8/.factory/validation/ungate-pull/scrutiny/synthesis.json +33 -0
  41. mlx_stack-0.3.8/.factory/validation/ungate-pull/scrutiny/synthesis.round1.json +48 -0
  42. mlx_stack-0.3.8/.factory/validation/ungate-pull/scrutiny/synthesis.round2.json +54 -0
  43. mlx_stack-0.3.8/.factory/validation/ungate-pull/user-testing/flows/group-a.json +54 -0
  44. mlx_stack-0.3.8/.factory/validation/ungate-pull/user-testing/flows/group-b.json +53 -0
  45. mlx_stack-0.3.8/.factory/validation/ungate-pull/user-testing/flows/group-c.json +52 -0
  46. mlx_stack-0.3.8/.factory/validation/ungate-pull/user-testing/flows/group-d.json +53 -0
  47. mlx_stack-0.3.8/.factory/validation/ungate-pull/user-testing/synthesis.json +43 -0
  48. mlx_stack-0.3.8/.release-please-manifest.json +3 -0
  49. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/CHANGELOG.md +25 -1
  50. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/PKG-INFO +34 -38
  51. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/README.md +33 -37
  52. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/src/mlx_stack/_version.py +2 -2
  53. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/src/mlx_stack/cli/bench.py +12 -5
  54. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/src/mlx_stack/cli/install.py +1 -1
  55. mlx_stack-0.3.8/src/mlx_stack/cli/main.py +294 -0
  56. mlx_stack-0.3.8/src/mlx_stack/cli/models.py +786 -0
  57. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/src/mlx_stack/cli/pull.py +32 -12
  58. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/src/mlx_stack/cli/setup.py +495 -41
  59. mlx_stack-0.3.8/src/mlx_stack/cli/status.py +190 -0
  60. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/src/mlx_stack/core/benchmark.py +103 -31
  61. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/src/mlx_stack/core/hardware.py +2 -1
  62. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/src/mlx_stack/core/launchd.py +6 -6
  63. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/src/mlx_stack/core/pull.py +131 -3
  64. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/src/mlx_stack/core/stack_status.py +2 -2
  65. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/src/mlx_stack/core/stack_up.py +2 -2
  66. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/src/mlx_stack/core/watchdog.py +1 -1
  67. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/tests/unit/test_benchmark.py +295 -0
  68. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/tests/unit/test_cli.py +50 -8
  69. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/tests/unit/test_cli_bench.py +139 -0
  70. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/tests/unit/test_cli_install.py +2 -2
  71. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/tests/unit/test_cli_models.py +1222 -2
  72. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/tests/unit/test_cli_pull.py +563 -0
  73. mlx_stack-0.3.8/tests/unit/test_cli_setup.py +1321 -0
  74. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/tests/unit/test_cli_status.py +640 -5
  75. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/tests/unit/test_cli_up.py +3 -3
  76. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/tests/unit/test_cli_watch.py +1 -1
  77. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/tests/unit/test_cross_area.py +67 -83
  78. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/tests/unit/test_hardware.py +6 -2
  79. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/tests/unit/test_launchd.py +1 -1
  80. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/tests/unit/test_lifecycle_fixes.py +1 -26
  81. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/uv.lock +3 -3
  82. mlx_stack-0.3.6/.factory/init.sh +0 -15
  83. mlx_stack-0.3.6/.factory/library/architecture.md +0 -92
  84. mlx_stack-0.3.6/.factory/library/environment.md +0 -23
  85. mlx_stack-0.3.6/.factory/library/user-testing.md +0 -80
  86. mlx_stack-0.3.6/.factory/services.yaml +0 -9
  87. mlx_stack-0.3.6/.release-please-manifest.json +0 -3
  88. mlx_stack-0.3.6/src/mlx_stack/cli/init.py +0 -176
  89. mlx_stack-0.3.6/src/mlx_stack/cli/main.py +0 -190
  90. mlx_stack-0.3.6/src/mlx_stack/cli/models.py +0 -306
  91. mlx_stack-0.3.6/src/mlx_stack/cli/profile.py +0 -61
  92. mlx_stack-0.3.6/src/mlx_stack/cli/recommend.py +0 -384
  93. mlx_stack-0.3.6/src/mlx_stack/cli/status.py +0 -116
  94. mlx_stack-0.3.6/tests/unit/test_cli_init.py +0 -1317
  95. mlx_stack-0.3.6/tests/unit/test_cli_profile.py +0 -344
  96. mlx_stack-0.3.6/tests/unit/test_cli_recommend.py +0 -1285
  97. mlx_stack-0.3.6/tests/unit/test_cli_setup.py +0 -199
  98. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/.factory/settings.json +0 -0
  99. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/.factory/skills/cli-feature/SKILL.md +0 -0
  100. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/.factory/validation/foundation/scrutiny/reviews/configuration-management.json +0 -0
  101. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/.factory/validation/foundation/scrutiny/reviews/dependency-management.json +0 -0
  102. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/.factory/validation/foundation/scrutiny/reviews/fix-catalog-errors-and-families.json +0 -0
  103. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/.factory/validation/foundation/scrutiny/reviews/fix-deps-binary-and-ansi.json +0 -0
  104. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/.factory/validation/foundation/scrutiny/reviews/fix-scaffolding-data-home.json +0 -0
  105. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/.factory/validation/foundation/scrutiny/reviews/hardware-detection.json +0 -0
  106. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/.factory/validation/foundation/scrutiny/reviews/model-catalog.json +0 -0
  107. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/.factory/validation/foundation/scrutiny/reviews/project-scaffolding.json +0 -0
  108. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/.factory/validation/foundation/scrutiny/synthesis.json +0 -0
  109. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/.factory/validation/foundation/scrutiny/synthesis.round1.json +0 -0
  110. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/.factory/validation/foundation/user-testing/flows/foundation-config-basic.json +0 -0
  111. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/.factory/validation/foundation/user-testing/flows/foundation-config-deps.json +0 -0
  112. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/.factory/validation/foundation/user-testing/flows/foundation-profile-catalog.json +0 -0
  113. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/.factory/validation/foundation/user-testing/flows/foundation-setup-profile-core.json +0 -0
  114. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/.factory/validation/foundation/user-testing/synthesis.json +0 -0
  115. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/.factory/validation/lifecycle/scrutiny/reviews/down-command.json +0 -0
  116. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/.factory/validation/lifecycle/scrutiny/reviews/fix-lifecycle-preflight-and-readonly.json +0 -0
  117. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/.factory/validation/lifecycle/scrutiny/reviews/fix-lifecycle-process-robustness.json +0 -0
  118. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/.factory/validation/lifecycle/scrutiny/reviews/fix-lifecycle-typecheck.json +0 -0
  119. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/.factory/validation/lifecycle/scrutiny/reviews/process-management.json +0 -0
  120. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/.factory/validation/lifecycle/scrutiny/reviews/status-command.json +0 -0
  121. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/.factory/validation/lifecycle/scrutiny/reviews/up-command.json +0 -0
  122. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/.factory/validation/lifecycle/scrutiny/synthesis.json +0 -0
  123. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/.factory/validation/lifecycle/scrutiny/synthesis.round1.json +0 -0
  124. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/.factory/validation/lifecycle/user-testing/flows/r1-g1-deps-up-basics.json +0 -0
  125. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/.factory/validation/lifecycle/user-testing/flows/r1-g2-up-startup.json +0 -0
  126. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/.factory/validation/lifecycle/user-testing/flows/r1-g3-up-resilience.json +0 -0
  127. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/.factory/validation/lifecycle/user-testing/flows/r1-g4-down.json +0 -0
  128. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/.factory/validation/lifecycle/user-testing/flows/r1-g5-status.json +0 -0
  129. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/.factory/validation/lifecycle/user-testing/flows/r1-g6-cross.json +0 -0
  130. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/.factory/validation/lifecycle/user-testing/flows/r2-g1-fixes.json +0 -0
  131. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/.factory/validation/lifecycle/user-testing/flows/r2-g2-cross-blockers.json +0 -0
  132. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/.factory/validation/lifecycle/user-testing/synthesis.json +0 -0
  133. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/.factory/validation/lifecycle/user-testing/synthesis.round1.json +0 -0
  134. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/.factory/validation/misc-cross-area/scrutiny/reviews/fix-cross-area-test-rigor.json +0 -0
  135. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/.factory/validation/misc-cross-area/scrutiny/reviews/misc-cross-area-validation.json +0 -0
  136. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/.factory/validation/misc-cross-area/scrutiny/synthesis.json +0 -0
  137. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/.factory/validation/misc-cross-area/scrutiny/synthesis.round1.json +0 -0
  138. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/.factory/validation/misc-cross-area/user-testing/flows/r1-g1-cross-flows.json +0 -0
  139. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/.factory/validation/misc-cross-area/user-testing/flows/r2-g4-cross-port5050.json +0 -0
  140. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/.factory/validation/misc-cross-area/user-testing/synthesis.json +0 -0
  141. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/.factory/validation/misc-cross-area/user-testing/synthesis.round1.json +0 -0
  142. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/.factory/validation/ops/scrutiny/reviews/fix-ops-lint-errors.json +0 -0
  143. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/.factory/validation/ops/scrutiny/reviews/fix-ops-scrutiny-issues.json +0 -0
  144. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/.factory/validation/ops/scrutiny/reviews/fix-ops-typecheck-errors.json +0 -0
  145. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/.factory/validation/ops/scrutiny/reviews/launchd-integration.json +0 -0
  146. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/.factory/validation/ops/scrutiny/reviews/log-rotation.json +0 -0
  147. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/.factory/validation/ops/scrutiny/reviews/logs-command.json +0 -0
  148. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/.factory/validation/ops/scrutiny/reviews/ops-cross-area-validation.json +0 -0
  149. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/.factory/validation/ops/scrutiny/reviews/watchdog-command.json +0 -0
  150. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/.factory/validation/ops/scrutiny/synthesis.json +0 -0
  151. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/.factory/validation/ops/scrutiny/synthesis.round1.json +0 -0
  152. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/.factory/validation/ops/user-testing/flows/g1-log.json +0 -0
  153. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/.factory/validation/ops/user-testing/flows/g2-logs-command.json +0 -0
  154. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/.factory/validation/ops/user-testing/flows/g3-watch.json +0 -0
  155. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/.factory/validation/ops/user-testing/flows/g4-launchd.json +0 -0
  156. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/.factory/validation/ops/user-testing/flows/g5-cross-ops.json +0 -0
  157. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/.factory/validation/ops/user-testing/synthesis.json +0 -0
  158. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/.factory/validation/public-ready/scrutiny/reviews/community-docs.json +0 -0
  159. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/.factory/validation/public-ready/scrutiny/reviews/developing-guide.json +0 -0
  160. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/.factory/validation/public-ready/scrutiny/reviews/fix-public-ready-scrutiny.json +0 -0
  161. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/.factory/validation/public-ready/scrutiny/reviews/github-actions-ci.json +0 -0
  162. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/.factory/validation/public-ready/scrutiny/reviews/readme-rewrite.json +0 -0
  163. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/.factory/validation/public-ready/scrutiny/synthesis.json +0 -0
  164. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/.factory/validation/public-ready/scrutiny/synthesis.round1.json +0 -0
  165. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/.factory/validation/recommendation/scrutiny/reviews/fix-init-and-models-issues.json +0 -0
  166. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/.factory/validation/recommendation/scrutiny/reviews/fix-recommendation-scoring-issues.json +0 -0
  167. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/.factory/validation/recommendation/scrutiny/reviews/fix-scoring-lint.json +0 -0
  168. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/.factory/validation/recommendation/scrutiny/reviews/init-command.json +0 -0
  169. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/.factory/validation/recommendation/scrutiny/reviews/models-command.json +0 -0
  170. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/.factory/validation/recommendation/scrutiny/reviews/recommend-command.json +0 -0
  171. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/.factory/validation/recommendation/scrutiny/reviews/scoring-engine.json +0 -0
  172. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/.factory/validation/recommendation/scrutiny/synthesis.json +0 -0
  173. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/.factory/validation/recommendation/scrutiny/synthesis.round1.json +0 -0
  174. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/.factory/validation/recommendation/user-testing/flows/g1-recommend-budget-ranking.json +0 -0
  175. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/.factory/validation/recommendation/user-testing/flows/g2-recommend-output-integration.json +0 -0
  176. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/.factory/validation/recommendation/user-testing/flows/g3-init-core-routing.json +0 -0
  177. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/.factory/validation/recommendation/user-testing/flows/g4-init-cloud-overwrite.json +0 -0
  178. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/.factory/validation/recommendation/user-testing/flows/g5-init-hardware-summary.json +0 -0
  179. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/.factory/validation/recommendation/user-testing/flows/g6-models-local.json +0 -0
  180. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/.factory/validation/recommendation/user-testing/flows/g7-models-catalog.json +0 -0
  181. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/.factory/validation/recommendation/user-testing/flows/r2-g1-recommend.json +0 -0
  182. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/.factory/validation/recommendation/user-testing/flows/r2-g2-models-catalog-filters.json +0 -0
  183. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/.factory/validation/recommendation/user-testing/flows/r2-g3-cross-012.json +0 -0
  184. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/.factory/validation/recommendation/user-testing/synthesis.json +0 -0
  185. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/.factory/validation/recommendation/user-testing/synthesis.round1.json +0 -0
  186. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/.factory/validation/tooling/scrutiny/reviews/bench-command.json +0 -0
  187. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/.factory/validation/tooling/scrutiny/reviews/fix-tooling-scrutiny-issues.json +0 -0
  188. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/.factory/validation/tooling/scrutiny/reviews/pull-command.json +0 -0
  189. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/.factory/validation/tooling/scrutiny/synthesis.json +0 -0
  190. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/.factory/validation/tooling/scrutiny/synthesis.round1.json +0 -0
  191. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/.factory/validation/tooling/user-testing/flows/g1-pull-core.json +0 -0
  192. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/.factory/validation/tooling/user-testing/flows/g2-pull-errors.json +0 -0
  193. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/.factory/validation/tooling/user-testing/flows/g3-bench-core.json +0 -0
  194. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/.factory/validation/tooling/user-testing/flows/g4-bench-advanced.json +0 -0
  195. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/.factory/validation/tooling/user-testing/flows/r2-g1-pull.json +0 -0
  196. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/.factory/validation/tooling/user-testing/flows/r2-g2-bench.json +0 -0
  197. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/.factory/validation/tooling/user-testing/flows/r3-g1-pull.json +0 -0
  198. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/.factory/validation/tooling/user-testing/flows/r3-g2-bench.json +0 -0
  199. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/.factory/validation/tooling/user-testing/flows/r4-g1-bench.json +0 -0
  200. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/.factory/validation/tooling/user-testing/synthesis.json +0 -0
  201. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/.factory/validation/tooling/user-testing/synthesis.round1.json +0 -0
  202. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/.factory/validation/tooling/user-testing/synthesis.round2.json +0 -0
  203. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/.factory/validation/tooling/user-testing/synthesis.round3.json +0 -0
  204. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/.github/release.yml +0 -0
  205. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/.github/workflows/ci.yml +0 -0
  206. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/.github/workflows/integration-nightly.yml +0 -0
  207. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/.github/workflows/integration-prerelease.yml +0 -0
  208. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/.github/workflows/publish.yml +0 -0
  209. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/.github/workflows/release-please.yml +0 -0
  210. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/.gitignore +0 -0
  211. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/CODE_OF_CONDUCT.md +0 -0
  212. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/CONTRIBUTING.md +0 -0
  213. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/DEVELOPING.md +0 -0
  214. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/LICENSE +0 -0
  215. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/Makefile +0 -0
  216. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/SECURITY.md +0 -0
  217. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/pyproject.toml +0 -0
  218. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/release-please-config.json +0 -0
  219. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/src/mlx_stack/__init__.py +0 -0
  220. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/src/mlx_stack/cli/__init__.py +0 -0
  221. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/src/mlx_stack/cli/config.py +0 -0
  222. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/src/mlx_stack/cli/down.py +0 -0
  223. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/src/mlx_stack/cli/logs.py +0 -0
  224. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/src/mlx_stack/cli/up.py +0 -0
  225. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/src/mlx_stack/cli/watch.py +0 -0
  226. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/src/mlx_stack/core/__init__.py +0 -0
  227. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/src/mlx_stack/core/catalog.py +0 -0
  228. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/src/mlx_stack/core/config.py +0 -0
  229. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/src/mlx_stack/core/deps.py +0 -0
  230. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/src/mlx_stack/core/discovery.py +0 -0
  231. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/src/mlx_stack/core/litellm_gen.py +0 -0
  232. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/src/mlx_stack/core/log_rotation.py +0 -0
  233. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/src/mlx_stack/core/log_viewer.py +0 -0
  234. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/src/mlx_stack/core/models.py +0 -0
  235. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/src/mlx_stack/core/onboarding.py +0 -0
  236. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/src/mlx_stack/core/paths.py +0 -0
  237. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/src/mlx_stack/core/process.py +0 -0
  238. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/src/mlx_stack/core/scoring.py +0 -0
  239. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/src/mlx_stack/core/stack_down.py +0 -0
  240. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/src/mlx_stack/core/stack_init.py +0 -0
  241. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/src/mlx_stack/data/__init__.py +0 -0
  242. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/src/mlx_stack/data/benchmark_data.json +0 -0
  243. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/src/mlx_stack/data/catalog/__init__.py +0 -0
  244. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/src/mlx_stack/data/catalog/deepseek-r1-32b.yaml +0 -0
  245. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/src/mlx_stack/data/catalog/deepseek-r1-8b.yaml +0 -0
  246. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/src/mlx_stack/data/catalog/gemma3-12b.yaml +0 -0
  247. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/src/mlx_stack/data/catalog/gemma3-27b.yaml +0 -0
  248. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/src/mlx_stack/data/catalog/gemma3-4b.yaml +0 -0
  249. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/src/mlx_stack/data/catalog/llama3.3-8b.yaml +0 -0
  250. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/src/mlx_stack/data/catalog/nemotron-49b.yaml +0 -0
  251. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/src/mlx_stack/data/catalog/nemotron-8b.yaml +0 -0
  252. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/src/mlx_stack/data/catalog/qwen3-8b.yaml +0 -0
  253. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/src/mlx_stack/data/catalog/qwen3.5-0.8b.yaml +0 -0
  254. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/src/mlx_stack/data/catalog/qwen3.5-14b.yaml +0 -0
  255. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/src/mlx_stack/data/catalog/qwen3.5-32b.yaml +0 -0
  256. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/src/mlx_stack/data/catalog/qwen3.5-3b.yaml +0 -0
  257. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/src/mlx_stack/data/catalog/qwen3.5-72b.yaml +0 -0
  258. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/src/mlx_stack/data/catalog/qwen3.5-8b.yaml +0 -0
  259. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/src/mlx_stack/py.typed +0 -0
  260. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/src/mlx_stack/utils/__init__.py +0 -0
  261. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/tests/__init__.py +0 -0
  262. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/tests/conftest.py +0 -0
  263. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/tests/factories.py +0 -0
  264. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/tests/fakes.py +0 -0
  265. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/tests/integration/__init__.py +0 -0
  266. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/tests/integration/conftest.py +0 -0
  267. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/tests/integration/report.py +0 -0
  268. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/tests/integration/test_catalog_validation.py +0 -0
  269. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/tests/integration/test_harness_compatibility.py +0 -0
  270. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/tests/integration/test_inference_e2e.py +0 -0
  271. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/tests/integration/test_launchd_e2e.py +0 -0
  272. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/tests/integration/test_model_smoke.py +0 -0
  273. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/tests/integration/test_stack_integration.py +0 -0
  274. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/tests/unit/__init__.py +0 -0
  275. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/tests/unit/conftest.py +0 -0
  276. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/tests/unit/test_catalog.py +0 -0
  277. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/tests/unit/test_cli_config.py +0 -0
  278. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/tests/unit/test_cli_down.py +0 -0
  279. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/tests/unit/test_cli_logs.py +0 -0
  280. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/tests/unit/test_config.py +0 -0
  281. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/tests/unit/test_data_dir.py +0 -0
  282. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/tests/unit/test_deps.py +0 -0
  283. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/tests/unit/test_discovery.py +0 -0
  284. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/tests/unit/test_litellm_gen.py +0 -0
  285. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/tests/unit/test_log_rotation.py +0 -0
  286. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/tests/unit/test_log_viewer.py +0 -0
  287. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/tests/unit/test_models.py +0 -0
  288. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/tests/unit/test_onboarding.py +0 -0
  289. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/tests/unit/test_ops_cross_area.py +0 -0
  290. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/tests/unit/test_paths.py +0 -0
  291. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/tests/unit/test_process.py +0 -0
  292. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/tests/unit/test_robustness_fixes.py +0 -0
  293. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/tests/unit/test_scoring.py +0 -0
  294. {mlx_stack-0.3.6 → mlx_stack-0.3.8}/tests/unit/test_watchdog.py +0 -0
@@ -0,0 +1,7 @@
1
+ #!/bin/bash
2
+ set -e
3
+
4
+ cd /Users/weae1504/Projects/mlx-stack
5
+
6
+ # Install dev dependencies (idempotent)
7
+ uv sync --dev
@@ -0,0 +1,80 @@
1
+ # Architecture
2
+
3
+ How the mlx-stack system works at a high level.
4
+
5
+ ## Overview
6
+
7
+ mlx-stack is a CLI tool that manages local LLM infrastructure on Apple Silicon. It orchestrates vllm-mlx model servers behind a LiteLLM proxy, providing a unified OpenAI-compatible API endpoint.
8
+
9
+ ## Layers
10
+
11
+ ```
12
+ CLI Layer (src/mlx_stack/cli/)
13
+ ├── Commands: setup, up, down, status, models, pull, bench, logs, config, watch, install, uninstall
14
+ └── Each command is a Click command registered in main.py
15
+
16
+ Core Layer (src/mlx_stack/core/)
17
+ ├── hardware.py — Apple Silicon detection (chip, GPU cores, memory, bandwidth)
18
+ ├── catalog.py — YAML catalog loading, validation, querying (15 curated models)
19
+ ├── discovery.py — Live HuggingFace API query for mlx-community models
20
+ ├── scoring.py — Hardware-aware model recommendation engine
21
+ ├── onboarding.py — Setup wizard orchestration (scoring variant for DiscoveredModel)
22
+ ├── stack_init.py — Stack definition generation (stack.yaml + litellm.yaml)
23
+ ├── litellm_gen.py — LiteLLM proxy config generation
24
+ ├── stack_up.py — Process management (start/stop vllm-mlx + LiteLLM)
25
+ ├── pull.py — Model download (HuggingFace snapshot_download)
26
+ ├── benchmark.py — Performance benchmarking
27
+ ├── watchdog.py — Health monitoring + auto-restart
28
+ ├── launchd.py — macOS LaunchAgent management
29
+ ├── config.py — User config (~/.mlx-stack/config.yaml)
30
+ ├── paths.py — Path resolution for data/config/stacks
31
+ └── process.py — Low-level process management
32
+
33
+ Data Layer (src/mlx_stack/data/)
34
+ ├── catalog/*.yaml — Curated model entries (15 files)
35
+ └── benchmark_data.json — Static performance overlay from mlx_transformers_benchmark
36
+ ```
37
+
38
+ ## Data Flow
39
+
40
+ 1. **Hardware detection** → `HardwareProfile` (chip, memory, bandwidth, GPU cores)
41
+ 2. **Model discovery** → `CatalogEntry` (from YAML catalog) or `DiscoveredModel` (from HF API)
42
+ 3. **Scoring** → `ScoredModel` / `ScoredDiscoveredModel` with composite scores
43
+ 4. **Tier assignment** → `TierAssignment` (model → tier name mapping)
44
+ 5. **Config generation** → `stack.yaml` (tier definitions) + `litellm.yaml` (proxy config)
45
+ 6. **Process management** → vllm-mlx subprocesses + LiteLLM proxy process
46
+
47
+ ## Stack Tier Field Semantics
48
+
49
+ - `stack.yaml` tier objects use:
50
+ - `name`: tier identifier (e.g., `standard`, `fast`, `reasoning`)
51
+ - `model`: canonical model identifier used by mlx-stack logic
52
+ - `source`: concrete model source for runtime/download
53
+ - For catalog-backed tiers, keep `model` as the catalog model ID (for example `qwen3.5-8b`) rather than a display label, and keep the resolved Hugging Face repo in `source`.
54
+
55
+ ## Key Files for This Mission
56
+
57
+ - `cli/main.py` — Command registration, `_COMMAND_CATEGORIES`, welcome screen, help formatting
58
+ - `cli/pull.py` — Pull command (being ungated to accept HF repos)
59
+ - `cli/status.py` — Status command (absorbing hardware display from profile)
60
+ - `cli/models.py` — Models command (absorbing recommend functionality)
61
+ - `cli/setup.py` — Setup command (gaining modification flags)
62
+ - `cli/profile.py` — Being DELETED
63
+ - `cli/recommend.py` — Being DELETED
64
+ - `cli/init.py` — Being DELETED
65
+ - `core/pull.py` — Download infrastructure (already accepts arbitrary HF repos)
66
+ - `core/stack_init.py` — Config generation (preserved for internal use by setup)
67
+ - `core/onboarding.py` — Setup wizard orchestration
68
+
69
+ ## Testing Patterns
70
+
71
+ - All CLI tests use Click's `CliRunner().invoke(cli, ["command", ...])`
72
+ - Core functions mocked via `@patch("mlx_stack.core.module.function")` or `monkeypatch.setattr`
73
+ - `FakeServiceLayer` test double for stack_up/watchdog tests
74
+ - Test factories in `tests/factories.py` for creating test data
75
+ - No real HF downloads, no real hardware detection in unit tests
76
+
77
+ ## Operational Constraint: Service Name Safety
78
+
79
+ - Service names are reused as PID/log filename stems by `core/process.py` (`pid_file` and log path construction).
80
+ - Any dynamically generated `service_name` must be filesystem-safe (no path separators like `/`), or temp process startup can fail before health checks run.
@@ -0,0 +1,38 @@
1
+ # Environment
2
+
3
+ Environment variables, external dependencies, and setup notes.
4
+
5
+ **What belongs here:** Required env vars, external API keys/services, dependency quirks, platform-specific notes.
6
+ **What does NOT belong here:** Service ports/commands (use `.factory/services.yaml`).
7
+
8
+ ---
9
+
10
+ ## Python Environment
11
+
12
+ - Python 3.14+ via `uv`
13
+ - All dependencies managed by `uv sync --dev`
14
+ - Virtual environment at `.venv/` (created by uv)
15
+
16
+ ## Key Dependencies
17
+
18
+ - `click` — CLI framework
19
+ - `rich` — Terminal UI (tables, colors, progress)
20
+ - `pyyaml` — YAML parsing
21
+ - `huggingface_hub` — HF API + model downloads
22
+ - `pytest` + `pytest-cov` — Testing
23
+ - `ruff` — Linting
24
+ - `pyright` — Type checking
25
+
26
+ ## Environment Variables
27
+
28
+ - `MLX_STACK_HOME` — Override data directory (default: `~/.mlx-stack/`). Used extensively in tests via `mlx_stack_home` fixture.
29
+
30
+ ## Data Directories
31
+
32
+ - `~/.mlx-stack/` — User data home
33
+ - `~/.mlx-stack/stacks/default.yaml` — Stack definition
34
+ - `~/.mlx-stack/litellm.yaml` — LiteLLM proxy config
35
+ - `~/.mlx-stack/profile.json` — Hardware profile
36
+ - `~/.mlx-stack/config.yaml` — User configuration
37
+ - `~/.mlx-stack/models/` — Downloaded model files
38
+ - `~/.mlx-stack/benchmarks/` — Saved benchmark results
@@ -0,0 +1,57 @@
1
+ # User Testing
2
+
3
+ Testing surface, required tools, and validation approach.
4
+
5
+ ## Validation Surface
6
+
7
+ **Primary surface:** CLI commands via pytest CliRunner (unit-level) and shell invocation (smoke-level).
8
+
9
+ This is a CLI-only mission with no browser UI, no running services, and no external API dependencies during testing. All HuggingFace API calls and model downloads are mocked in tests.
10
+
11
+ ### Tools
12
+
13
+ - **pytest** with Click's `CliRunner` — primary test executor
14
+ - **Shell invocation** — for smoke tests that verify real subprocess CLI behavior
15
+ - **pyright** — type checking gate
16
+ - **ruff** — linting gate
17
+
18
+ ### Test Commands
19
+
20
+ ```bash
21
+ uv run pytest --cov=src/mlx_stack -x -q --tb=short # unit tests
22
+ uv run python -m pyright # type check
23
+ uv run ruff check src/ tests/ # lint
24
+ ```
25
+
26
+ ## Validation Concurrency
27
+
28
+ **Max concurrent validators: 5**
29
+
30
+ Rationale: CLI tests are lightweight (no browser, no services). Each pytest invocation uses ~100MB RAM. Machine has 128GB RAM and 18 CPU cores. Even 5 concurrent test runs would use <1GB total. No infrastructure contention.
31
+
32
+ ## Testing Patterns
33
+
34
+ - CLI commands tested via `CliRunner().invoke(cli, ["command", "--flag", "arg"])`
35
+ - Exit codes checked: 0 for success, non-zero for errors
36
+ - Output checked via `result.output` string matching
37
+ - Side effects verified via mock assertions (`mock_download.assert_called_once()`, etc.)
38
+ - File system effects checked via `tmp_path` fixtures
39
+ - Test factories in `tests/factories.py` for creating test data consistently
40
+
41
+ ## Flow Validator Guidance: CLI
42
+
43
+ - Surface is CLI-only; do not use browser automation.
44
+ - Stay within repository-local and mission-local paths only:
45
+ - Repo: `/Users/weae1504/Projects/mlx-stack`
46
+ - Mission evidence: `/Users/weae1504/.factory/missions/7fc62a3d-138f-4cd2-a601-3f6d1b174b53/evidence/ungate-pull/<group-id>/`
47
+ - Prefer assertion-targeted checks first (specific pytest tests and direct CLI invocations), then add broader checks only when needed to disambiguate failures.
48
+ - Do not edit source code while validating; only create report/evidence artifacts requested for user-testing flows.
49
+ - If any assertion is blocked by environment/tooling, capture exact blocking command output and mark as blocked rather than guessing.
50
+
51
+ ## Validation Notes: absorb-recommend-remove-init
52
+
53
+ - `mlx-stack models --available` currently degrades to catalog-backed fallback when HF API is unreachable and still exits `0`. To reproduce the outage path in validation, set:
54
+ - `HTTP_PROXY=http://127.0.0.1:9 HTTPS_PROXY=http://127.0.0.1:9 ALL_PROXY=http://127.0.0.1:9 NO_PROXY=`
55
+ - or `HF_ENDPOINT=http://127.0.0.1:9`
56
+ - `mlx-stack pull <model> --bench` now prints generic saved-results scoring guidance
57
+ without referencing removed commands (`recommend`/`init`).
@@ -0,0 +1,8 @@
1
+ commands:
2
+ install: uv sync --dev
3
+ test: uv run pytest --cov=src/mlx_stack -x -q --tb=short
4
+ typecheck: uv run python -m pyright
5
+ lint: uv run ruff check src/ tests/
6
+ check: uv run ruff check src/ tests/ && uv run python -m pyright && uv run pytest --cov=src/mlx_stack -x -q --tb=short
7
+
8
+ services: {}
@@ -0,0 +1,145 @@
1
+ ---
2
+ name: cli-worker
3
+ description: Implements CLI command changes, module refactoring, and test updates for mlx-stack
4
+ ---
5
+
6
+ # CLI Worker
7
+
8
+ NOTE: Startup and cleanup are handled by `worker-base`. This skill defines the WORK PROCEDURE.
9
+
10
+ ## When to Use This Skill
11
+
12
+ Use for features that involve:
13
+ - Adding, removing, or modifying Click CLI commands
14
+ - Updating command registration in `main.py`
15
+ - Modifying core modules called by CLI commands
16
+ - Writing or rewriting pytest unit tests for CLI commands
17
+ - Updating help text, command categories, error messages
18
+
19
+ ## Required Skills
20
+
21
+ None — all work uses standard file editing and shell commands (pytest, pyright, ruff).
22
+
23
+ ## Work Procedure
24
+
25
+ ### Step 1: Understand the Feature
26
+
27
+ Read the feature description, preconditions, expectedBehavior, and verificationSteps carefully. Read AGENTS.md for conventions and boundaries. Read `.factory/library/architecture.md` for system structure.
28
+
29
+ ### Step 2: Read Affected Files
30
+
31
+ Before writing any code, read ALL files that will be affected:
32
+ - The CLI command file(s) being changed
33
+ - The core module(s) being called
34
+ - The test file(s) being updated
35
+ - `cli/main.py` if command registration changes
36
+ - Any test files that import from affected modules
37
+
38
+ Understand the existing patterns, mock strategies, and test structure.
39
+
40
+ ### Step 3: Write Tests First (TDD)
41
+
42
+ Write failing tests BEFORE implementing changes:
43
+ 1. Create or update the test file with new test cases
44
+ 2. Run `uv run pytest tests/unit/<test_file> -x -q --tb=short` to confirm tests fail (red)
45
+ 3. Each test should test ONE specific behavior from the feature's expectedBehavior
46
+
47
+ Test patterns to follow:
48
+ ```python
49
+ from click.testing import CliRunner
50
+ from mlx_stack.cli.main import cli
51
+
52
+ def test_example(mlx_stack_home):
53
+ runner = CliRunner()
54
+ with patch("mlx_stack.core.module.function") as mock_fn:
55
+ result = runner.invoke(cli, ["command", "--flag", "arg"])
56
+ assert result.exit_code == 0
57
+ assert "expected output" in result.output
58
+ mock_fn.assert_called_once_with(...)
59
+ ```
60
+
61
+ ### Step 4: Implement Changes
62
+
63
+ Make the minimum changes needed to make all tests pass:
64
+ 1. Modify CLI command files
65
+ 2. Modify core modules if needed
66
+ 3. Update `cli/main.py` command registration if needed
67
+
68
+ Follow existing patterns:
69
+ - Use `console = Console(stderr=True)` for errors, `out = Console()` for output
70
+ - Catch domain exceptions, print user-friendly errors, `raise SystemExit(1)`
71
+ - Use absolute imports: `from mlx_stack.core.module import Class`
72
+
73
+ ### Step 5: Run Tests (Green)
74
+
75
+ 1. Run the specific test file: `uv run pytest tests/unit/<test_file> -x -q --tb=short`
76
+ 2. Run the FULL test suite: `uv run pytest --cov=src/mlx_stack -x -q --tb=short`
77
+ 3. Fix any failures in other test files caused by your changes
78
+
79
+ ### Step 6: Run Validators
80
+
81
+ 1. Type check: `uv run python -m pyright`
82
+ 2. Lint: `uv run ruff check src/ tests/`
83
+ 3. Fix any issues
84
+
85
+ ### Step 7: Verify Manually
86
+
87
+ For each changed command, run a quick manual check:
88
+ ```bash
89
+ uv run mlx-stack --help # verify help output
90
+ uv run mlx-stack <command> --help # verify command help
91
+ ```
92
+
93
+ If the feature removes a command, verify it's gone:
94
+ ```bash
95
+ uv run mlx-stack <removed-command> # should show error
96
+ ```
97
+
98
+ ### Step 8: Clean Up
99
+
100
+ - Remove any deleted test files from disk
101
+ - Remove any deleted CLI command files from disk
102
+ - Ensure no orphaned imports remain
103
+ - Run the full test suite one final time
104
+
105
+ ## Example Handoff
106
+
107
+ ```json
108
+ {
109
+ "salientSummary": "Ungated pull command to accept HF repo strings. Added slash-based routing (contains '/' = HF repo, no '/' = catalog ID). Wrote 12 new tests in test_cli_pull.py covering HF repo acceptance, error handling, and flag combinations. All 1400+ tests pass, pyright clean, ruff clean.",
110
+ "whatWasImplemented": "Modified cli/pull.py to detect HF repo strings (containing '/') and bypass catalog lookup, routing directly to download_model(). Updated core/pull.py pull_model() to accept hf_repo_override parameter. Updated help text to document both input types. Added 12 new test cases and updated 3 existing tests.",
111
+ "whatWasLeftUndone": "",
112
+ "verification": {
113
+ "commandsRun": [
114
+ { "command": "uv run pytest tests/unit/test_cli_pull.py -x -q --tb=short", "exitCode": 0, "observation": "77 passed (12 new + 65 existing)" },
115
+ { "command": "uv run pytest --cov=src/mlx_stack -x -q --tb=short", "exitCode": 0, "observation": "1412 passed, 0 failed" },
116
+ { "command": "uv run python -m pyright", "exitCode": 0, "observation": "0 errors, 0 warnings" },
117
+ { "command": "uv run ruff check src/ tests/", "exitCode": 0, "observation": "All checks passed" },
118
+ { "command": "uv run mlx-stack pull --help", "exitCode": 0, "observation": "Help text mentions HF repo and catalog ID" }
119
+ ],
120
+ "interactiveChecks": [
121
+ { "action": "Ran 'uv run mlx-stack pull --help'", "observed": "Help text now says 'MODEL is a catalog model ID (e.g., qwen3.5-8b) or HuggingFace repo (e.g., mlx-community/Phi-5-Mini-4bit)'" }
122
+ ]
123
+ },
124
+ "tests": {
125
+ "added": [
126
+ {
127
+ "file": "tests/unit/test_cli_pull.py",
128
+ "cases": [
129
+ { "name": "test_pull_hf_repo_downloads_directly", "verifies": "HF repo string bypasses catalog lookup" },
130
+ { "name": "test_pull_hf_repo_with_quant_stores_metadata", "verifies": "--quant flag stores metadata for HF repo" },
131
+ { "name": "test_pull_hf_repo_nonexistent_shows_error", "verifies": "Invalid HF repo shows user-friendly error" }
132
+ ]
133
+ }
134
+ ]
135
+ },
136
+ "discoveredIssues": []
137
+ }
138
+ ```
139
+
140
+ ## When to Return to Orchestrator
141
+
142
+ - Feature depends on changes that haven't been made yet (e.g., needs a core module that another feature creates)
143
+ - Test failures in unrelated areas that can't be resolved without understanding broader context
144
+ - Ambiguity in feature requirements that can't be resolved from AGENTS.md or feature description
145
+ - A boundary violation would be needed to complete the feature (e.g., need to change scoring.py)
@@ -0,0 +1,33 @@
1
+ {
2
+ "featureId": "absorb-profile-into-status",
3
+ "reviewedAt": "2026-04-04T20:13:14Z",
4
+ "commitId": "f684034",
5
+ "transcriptSkeletonReviewed": true,
6
+ "diffReviewed": true,
7
+ "status": "fail",
8
+ "codeReview": {
9
+ "summary": "The profile command removal and status hardware integration are mostly correct, but VAL-STATUS-008 is not met in real usage: estimated bandwidth is never marked as estimated when status reads persisted profile.json data.",
10
+ "issues": [
11
+ {
12
+ "file": "src/mlx_stack/core/hardware.py",
13
+ "line": 301,
14
+ "severity": "blocking",
15
+ "description": "load_profile() always reconstructs HardwareProfile with is_estimate=False, so status cannot emit '(estimate)' for unknown chips from saved profile data. status.py only adds the indicator when hw.is_estimate is true (src/mlx_stack/cli/status.py:72), which makes VAL-STATUS-008 fail for persisted profiles."
16
+ }
17
+ ]
18
+ },
19
+ "sharedStateObservations": [
20
+ {
21
+ "area": "conventions",
22
+ "observation": "Mission guidance and validation contract are misaligned for estimate handling: AGENTS.md says core/hardware.py must remain unchanged, but VAL-STATUS-008 expects estimate signaling that the current load_profile format/path does not preserve.",
23
+ "evidence": "AGENTS.md:14 mandates core/hardware.py unchanged; validation-contract.md:317-318 requires estimate indicator; core/hardware.py:301 hardcodes is_estimate=False."
24
+ },
25
+ {
26
+ "area": "skills",
27
+ "observation": "The worker marked skill procedure as followed, but did not follow cli-worker's TDD step ordering (tests-first).",
28
+ "evidence": "cli-worker/SKILL.md:40-45 requires writing failing tests before implementation; transcript skeleton for session 916ec186-7707-4ffc-80b2-72d2134197f5 shows code edits/deletions were performed before the later 'Now I need to update the existing tests and add new ones' step."
29
+ }
30
+ ],
31
+ "addressesFailureFrom": null,
32
+ "summary": "Review result: FAIL. The feature correctly removes `profile` and adds hardware output to `status`, but it does not satisfy VAL-STATUS-008 in real persisted-profile flows because estimate metadata is dropped by load_profile()."
33
+ }
@@ -0,0 +1,15 @@
1
+ {
2
+ "featureId": "fix-status-estimate-indicator",
3
+ "reviewedAt": "2026-04-04T20:24:27Z",
4
+ "commitId": "a477711",
5
+ "transcriptSkeletonReviewed": true,
6
+ "diffReviewed": true,
7
+ "status": "pass",
8
+ "codeReview": {
9
+ "summary": "The fix directly resolves the prior VAL-STATUS-008 failure by preserving `is_estimate` through profile serialization/deserialization and exposing that metadata in `status --json`. Compared with the original failing commit (`f684034`), which hardcoded `is_estimate=False` in `load_profile()`, commit `a477711` now reads `is_estimate` from profile JSON (with a backward-compatible default), so table output can correctly show `(estimate)` for estimated bandwidth.",
10
+ "issues": []
11
+ },
12
+ "sharedStateObservations": [],
13
+ "addressesFailureFrom": "/Users/weae1504/Projects/mlx-stack/.factory/validation/absorb-profile/scrutiny/reviews/absorb-profile-into-status.json",
14
+ "summary": "Review result: PASS. The fix adequately addresses the original failure by preserving estimate metadata end-to-end and leaving no blocking issues for estimate indication behavior."
15
+ }
@@ -0,0 +1,33 @@
1
+ {
2
+ "milestone": "absorb-profile",
3
+ "round": 2,
4
+ "status": "pass",
5
+ "validatorsRun": {
6
+ "test": {
7
+ "passed": true,
8
+ "command": "uv run pytest --cov=src/mlx_stack -x -q --tb=short",
9
+ "exitCode": 0
10
+ },
11
+ "typecheck": {
12
+ "passed": true,
13
+ "command": "uv run python -m pyright",
14
+ "exitCode": 0
15
+ },
16
+ "lint": {
17
+ "passed": true,
18
+ "command": "uv run ruff check src/ tests/",
19
+ "exitCode": 0
20
+ }
21
+ },
22
+ "reviewsSummary": {
23
+ "total": 1,
24
+ "passed": 1,
25
+ "failed": 0,
26
+ "failedFeatures": []
27
+ },
28
+ "blockingIssues": [],
29
+ "appliedUpdates": [],
30
+ "suggestedGuidanceUpdates": [],
31
+ "rejectedObservations": [],
32
+ "previousRound": ".factory/validation/absorb-profile/scrutiny/synthesis.round1.json"
33
+ }
@@ -0,0 +1,53 @@
1
+ {
2
+ "milestone": "absorb-profile",
3
+ "round": 1,
4
+ "status": "fail",
5
+ "validatorsRun": {
6
+ "test": {
7
+ "passed": true,
8
+ "command": "uv run pytest --cov=src/mlx_stack -x -q --tb=short",
9
+ "exitCode": 0
10
+ },
11
+ "typecheck": {
12
+ "passed": true,
13
+ "command": "uv run python -m pyright",
14
+ "exitCode": 0
15
+ },
16
+ "lint": {
17
+ "passed": true,
18
+ "command": "uv run ruff check src/ tests/",
19
+ "exitCode": 0
20
+ }
21
+ },
22
+ "reviewsSummary": {
23
+ "total": 1,
24
+ "passed": 0,
25
+ "failed": 1,
26
+ "failedFeatures": [
27
+ "absorb-profile-into-status"
28
+ ]
29
+ },
30
+ "blockingIssues": [
31
+ {
32
+ "featureId": "absorb-profile-into-status",
33
+ "severity": "blocking",
34
+ "description": "VAL-STATUS-008 is not satisfied for persisted profiles: load_profile() reconstructs HardwareProfile with is_estimate=false, so status output cannot display '(estimate)' for unknown chips loaded from profile.json."
35
+ }
36
+ ],
37
+ "appliedUpdates": [],
38
+ "suggestedGuidanceUpdates": [
39
+ {
40
+ "target": "AGENTS.md",
41
+ "suggestion": "Clarify the boundary that core/hardware.py can be minimally updated when required to preserve factual hardware metadata (for example estimate signaling) needed by validation assertions.",
42
+ "evidence": "AGENTS.md currently says core/hardware.py must remain unchanged, while VAL-STATUS-008 requires estimate signaling and review found load_profile() currently drops that metadata.",
43
+ "isSystemic": false
44
+ }
45
+ ],
46
+ "rejectedObservations": [
47
+ {
48
+ "observation": "Feature worker did not follow cli-worker TDD ordering strictly.",
49
+ "reason": "already-documented"
50
+ }
51
+ ],
52
+ "previousRound": null
53
+ }
@@ -0,0 +1,106 @@
1
+ {
2
+ "groupId": "status-core-cli",
3
+ "testedAt": "2026-04-04T20:29:01.801098+00:00",
4
+ "isolation": {
5
+ "surface": "CLI",
6
+ "repoRoot": "/Users/weae1504/Projects/mlx-stack",
7
+ "missionDir": "/Users/weae1504/.factory/missions/7fc62a3d-138f-4cd2-a601-3f6d1b174b53",
8
+ "mlxStackHome": "/Users/weae1504/.factory/missions/7fc62a3d-138f-4cd2-a601-3f6d1b174b53/evidence/absorb-profile/status-core-cli-home"
9
+ },
10
+ "toolsUsed": [
11
+ "shell",
12
+ "uv run mlx-stack"
13
+ ],
14
+ "assertionResults": [
15
+ {
16
+ "id": "VAL-STATUS-001",
17
+ "status": "pass",
18
+ "evidence": {
19
+ "files": [
20
+ "absorb-profile/status-core-cli/VAL-STATUS-001-profile-command.txt"
21
+ ],
22
+ "observed": "exit=2; contains_no_such_command=True; deprecated_present=False"
23
+ }
24
+ },
25
+ {
26
+ "id": "VAL-STATUS-002",
27
+ "status": "pass",
28
+ "evidence": {
29
+ "files": [
30
+ "absorb-profile/status-core-cli/VAL-STATUS-002-main-help.txt"
31
+ ],
32
+ "observed": "profile_listed=False; status_listed=True"
33
+ }
34
+ },
35
+ {
36
+ "id": "VAL-STATUS-003",
37
+ "status": "pass",
38
+ "evidence": {
39
+ "files": [
40
+ "absorb-profile/status-core-cli/VAL-STATUS-003-status-with-profile.txt"
41
+ ],
42
+ "observed": "exit=0; has_chip=True; has_gpu=True; has_memory=True; has_bandwidth=True; has_service_table=True"
43
+ }
44
+ },
45
+ {
46
+ "id": "VAL-STATUS-004",
47
+ "status": "pass",
48
+ "evidence": {
49
+ "files": [
50
+ "absorb-profile/status-core-cli/VAL-STATUS-004-status-without-profile.txt"
51
+ ],
52
+ "observed": "exit=0; traceback_present=False; has_service_table=True; hardware_section_present=False"
53
+ }
54
+ },
55
+ {
56
+ "id": "VAL-STATUS-007",
57
+ "status": "pass",
58
+ "evidence": {
59
+ "files": [
60
+ "absorb-profile/status-core-cli/VAL-STATUS-003-status-with-profile.txt"
61
+ ],
62
+ "observed": "missing_columns=[]"
63
+ }
64
+ },
65
+ {
66
+ "id": "VAL-STATUS-009",
67
+ "status": "pass",
68
+ "evidence": {
69
+ "files": [
70
+ "absorb-profile/status-core-cli/VAL-STATUS-009-status-help.txt"
71
+ ],
72
+ "observed": "exit=0; mentions_hardware_or_chip=True; has_json_flag=True"
73
+ }
74
+ }
75
+ ],
76
+ "commandsRun": [
77
+ {
78
+ "command": "uv run --project /Users/weae1504/Projects/mlx-stack mlx-stack profile",
79
+ "exitCode": 2,
80
+ "keyObservation": "Error: No such command 'profile'."
81
+ },
82
+ {
83
+ "command": "uv run --project /Users/weae1504/Projects/mlx-stack mlx-stack --help",
84
+ "exitCode": 0,
85
+ "keyObservation": "mlx-stack \u2014 CLI control plane for local LLM infrastructure on Apple Silicon"
86
+ },
87
+ {
88
+ "command": "uv run --project /Users/weae1504/Projects/mlx-stack mlx-stack status",
89
+ "exitCode": 0,
90
+ "keyObservation": "Hardware "
91
+ },
92
+ {
93
+ "command": "uv run --project /Users/weae1504/Projects/mlx-stack mlx-stack status",
94
+ "exitCode": 0,
95
+ "keyObservation": "Service Status "
96
+ },
97
+ {
98
+ "command": "uv run --project /Users/weae1504/Projects/mlx-stack mlx-stack status --help",
99
+ "exitCode": 0,
100
+ "keyObservation": "Usage: mlx-stack status [OPTIONS]"
101
+ }
102
+ ],
103
+ "frictions": [],
104
+ "blockers": [],
105
+ "summary": "Tested 6 assertions: 6 passed, 0 failed, 0 blocked."
106
+ }