mlx-stack 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (220) hide show
  1. mlx_stack-0.1.0/.factory/init.sh +15 -0
  2. mlx_stack-0.1.0/.factory/library/architecture.md +92 -0
  3. mlx_stack-0.1.0/.factory/library/environment.md +23 -0
  4. mlx_stack-0.1.0/.factory/library/user-testing.md +80 -0
  5. mlx_stack-0.1.0/.factory/services.yaml +9 -0
  6. mlx_stack-0.1.0/.factory/settings.json +5 -0
  7. mlx_stack-0.1.0/.factory/skills/cli-feature/SKILL.md +350 -0
  8. mlx_stack-0.1.0/.factory/validation/foundation/scrutiny/reviews/configuration-management.json +33 -0
  9. mlx_stack-0.1.0/.factory/validation/foundation/scrutiny/reviews/dependency-management.json +50 -0
  10. mlx_stack-0.1.0/.factory/validation/foundation/scrutiny/reviews/fix-catalog-errors-and-families.json +21 -0
  11. mlx_stack-0.1.0/.factory/validation/foundation/scrutiny/reviews/fix-deps-binary-and-ansi.json +26 -0
  12. mlx_stack-0.1.0/.factory/validation/foundation/scrutiny/reviews/fix-scaffolding-data-home.json +26 -0
  13. mlx_stack-0.1.0/.factory/validation/foundation/scrutiny/reviews/hardware-detection.json +26 -0
  14. mlx_stack-0.1.0/.factory/validation/foundation/scrutiny/reviews/model-catalog.json +44 -0
  15. mlx_stack-0.1.0/.factory/validation/foundation/scrutiny/reviews/project-scaffolding.json +33 -0
  16. mlx_stack-0.1.0/.factory/validation/foundation/scrutiny/synthesis.json +56 -0
  17. mlx_stack-0.1.0/.factory/validation/foundation/scrutiny/synthesis.round1.json +89 -0
  18. mlx_stack-0.1.0/.factory/validation/foundation/user-testing/flows/foundation-config-basic.json +144 -0
  19. mlx_stack-0.1.0/.factory/validation/foundation/user-testing/flows/foundation-config-deps.json +198 -0
  20. mlx_stack-0.1.0/.factory/validation/foundation/user-testing/flows/foundation-profile-catalog.json +164 -0
  21. mlx_stack-0.1.0/.factory/validation/foundation/user-testing/flows/foundation-setup-profile-core.json +165 -0
  22. mlx_stack-0.1.0/.factory/validation/foundation/user-testing/synthesis.json +112 -0
  23. mlx_stack-0.1.0/.factory/validation/lifecycle/scrutiny/reviews/down-command.json +34 -0
  24. mlx_stack-0.1.0/.factory/validation/lifecycle/scrutiny/reviews/fix-lifecycle-preflight-and-readonly.json +26 -0
  25. mlx_stack-0.1.0/.factory/validation/lifecycle/scrutiny/reviews/fix-lifecycle-process-robustness.json +28 -0
  26. mlx_stack-0.1.0/.factory/validation/lifecycle/scrutiny/reviews/fix-lifecycle-typecheck.json +21 -0
  27. mlx_stack-0.1.0/.factory/validation/lifecycle/scrutiny/reviews/process-management.json +40 -0
  28. mlx_stack-0.1.0/.factory/validation/lifecycle/scrutiny/reviews/status-command.json +33 -0
  29. mlx_stack-0.1.0/.factory/validation/lifecycle/scrutiny/reviews/up-command.json +34 -0
  30. mlx_stack-0.1.0/.factory/validation/lifecycle/scrutiny/synthesis.json +51 -0
  31. mlx_stack-0.1.0/.factory/validation/lifecycle/scrutiny/synthesis.round1.json +82 -0
  32. mlx_stack-0.1.0/.factory/validation/lifecycle/user-testing/flows/r1-g1-deps-up-basics.json +184 -0
  33. mlx_stack-0.1.0/.factory/validation/lifecycle/user-testing/flows/r1-g2-up-startup.json +142 -0
  34. mlx_stack-0.1.0/.factory/validation/lifecycle/user-testing/flows/r1-g3-up-resilience.json +185 -0
  35. mlx_stack-0.1.0/.factory/validation/lifecycle/user-testing/flows/r1-g4-down.json +172 -0
  36. mlx_stack-0.1.0/.factory/validation/lifecycle/user-testing/flows/r1-g5-status.json +200 -0
  37. mlx_stack-0.1.0/.factory/validation/lifecycle/user-testing/flows/r1-g6-cross.json +219 -0
  38. mlx_stack-0.1.0/.factory/validation/lifecycle/user-testing/flows/r2-g1-fixes.json +107 -0
  39. mlx_stack-0.1.0/.factory/validation/lifecycle/user-testing/flows/r2-g2-cross-blockers.json +127 -0
  40. mlx_stack-0.1.0/.factory/validation/lifecycle/user-testing/synthesis.json +88 -0
  41. mlx_stack-0.1.0/.factory/validation/lifecycle/user-testing/synthesis.round1.json +238 -0
  42. mlx_stack-0.1.0/.factory/validation/misc-cross-area/scrutiny/reviews/fix-cross-area-test-rigor.json +21 -0
  43. mlx_stack-0.1.0/.factory/validation/misc-cross-area/scrutiny/reviews/misc-cross-area-validation.json +52 -0
  44. mlx_stack-0.1.0/.factory/validation/misc-cross-area/scrutiny/synthesis.json +38 -0
  45. mlx_stack-0.1.0/.factory/validation/misc-cross-area/scrutiny/synthesis.round1.json +68 -0
  46. mlx_stack-0.1.0/.factory/validation/misc-cross-area/user-testing/flows/r1-g1-cross-flows.json +140 -0
  47. mlx_stack-0.1.0/.factory/validation/misc-cross-area/user-testing/flows/r2-g4-cross-port5050.json +89 -0
  48. mlx_stack-0.1.0/.factory/validation/misc-cross-area/user-testing/synthesis.json +45 -0
  49. mlx_stack-0.1.0/.factory/validation/misc-cross-area/user-testing/synthesis.round1.json +55 -0
  50. mlx_stack-0.1.0/.factory/validation/ops/scrutiny/reviews/fix-ops-lint-errors.json +21 -0
  51. mlx_stack-0.1.0/.factory/validation/ops/scrutiny/reviews/fix-ops-scrutiny-issues.json +21 -0
  52. mlx_stack-0.1.0/.factory/validation/ops/scrutiny/reviews/fix-ops-typecheck-errors.json +21 -0
  53. mlx_stack-0.1.0/.factory/validation/ops/scrutiny/reviews/launchd-integration.json +40 -0
  54. mlx_stack-0.1.0/.factory/validation/ops/scrutiny/reviews/log-rotation.json +34 -0
  55. mlx_stack-0.1.0/.factory/validation/ops/scrutiny/reviews/logs-command.json +39 -0
  56. mlx_stack-0.1.0/.factory/validation/ops/scrutiny/reviews/ops-cross-area-validation.json +45 -0
  57. mlx_stack-0.1.0/.factory/validation/ops/scrutiny/reviews/watchdog-command.json +50 -0
  58. mlx_stack-0.1.0/.factory/validation/ops/scrutiny/synthesis.json +38 -0
  59. mlx_stack-0.1.0/.factory/validation/ops/scrutiny/synthesis.round1.json +113 -0
  60. mlx_stack-0.1.0/.factory/validation/ops/user-testing/flows/g1-log.json +172 -0
  61. mlx_stack-0.1.0/.factory/validation/ops/user-testing/flows/g2-logs-command.json +205 -0
  62. mlx_stack-0.1.0/.factory/validation/ops/user-testing/flows/g3-watch.json +242 -0
  63. mlx_stack-0.1.0/.factory/validation/ops/user-testing/flows/g4-launchd.json +150 -0
  64. mlx_stack-0.1.0/.factory/validation/ops/user-testing/flows/g5-cross-ops.json +109 -0
  65. mlx_stack-0.1.0/.factory/validation/ops/user-testing/synthesis.json +58 -0
  66. mlx_stack-0.1.0/.factory/validation/public-ready/scrutiny/reviews/community-docs.json +28 -0
  67. mlx_stack-0.1.0/.factory/validation/public-ready/scrutiny/reviews/developing-guide.json +34 -0
  68. mlx_stack-0.1.0/.factory/validation/public-ready/scrutiny/reviews/fix-public-ready-scrutiny.json +21 -0
  69. mlx_stack-0.1.0/.factory/validation/public-ready/scrutiny/reviews/github-actions-ci.json +28 -0
  70. mlx_stack-0.1.0/.factory/validation/public-ready/scrutiny/reviews/readme-rewrite.json +33 -0
  71. mlx_stack-0.1.0/.factory/validation/public-ready/scrutiny/synthesis.json +38 -0
  72. mlx_stack-0.1.0/.factory/validation/public-ready/scrutiny/synthesis.round1.json +80 -0
  73. mlx_stack-0.1.0/.factory/validation/recommendation/scrutiny/reviews/fix-init-and-models-issues.json +26 -0
  74. mlx_stack-0.1.0/.factory/validation/recommendation/scrutiny/reviews/fix-recommendation-scoring-issues.json +21 -0
  75. mlx_stack-0.1.0/.factory/validation/recommendation/scrutiny/reviews/fix-scoring-lint.json +21 -0
  76. mlx_stack-0.1.0/.factory/validation/recommendation/scrutiny/reviews/init-command.json +39 -0
  77. mlx_stack-0.1.0/.factory/validation/recommendation/scrutiny/reviews/models-command.json +45 -0
  78. mlx_stack-0.1.0/.factory/validation/recommendation/scrutiny/reviews/recommend-command.json +39 -0
  79. mlx_stack-0.1.0/.factory/validation/recommendation/scrutiny/reviews/scoring-engine.json +39 -0
  80. mlx_stack-0.1.0/.factory/validation/recommendation/scrutiny/synthesis.json +44 -0
  81. mlx_stack-0.1.0/.factory/validation/recommendation/scrutiny/synthesis.round1.json +108 -0
  82. mlx_stack-0.1.0/.factory/validation/recommendation/user-testing/flows/g1-recommend-budget-ranking.json +227 -0
  83. mlx_stack-0.1.0/.factory/validation/recommendation/user-testing/flows/g2-recommend-output-integration.json +284 -0
  84. mlx_stack-0.1.0/.factory/validation/recommendation/user-testing/flows/g3-init-core-routing.json +191 -0
  85. mlx_stack-0.1.0/.factory/validation/recommendation/user-testing/flows/g4-init-cloud-overwrite.json +188 -0
  86. mlx_stack-0.1.0/.factory/validation/recommendation/user-testing/flows/g5-init-hardware-summary.json +131 -0
  87. mlx_stack-0.1.0/.factory/validation/recommendation/user-testing/flows/g6-models-local.json +131 -0
  88. mlx_stack-0.1.0/.factory/validation/recommendation/user-testing/flows/g7-models-catalog.json +174 -0
  89. mlx_stack-0.1.0/.factory/validation/recommendation/user-testing/flows/r2-g1-recommend.json +160 -0
  90. mlx_stack-0.1.0/.factory/validation/recommendation/user-testing/flows/r2-g2-models-catalog-filters.json +112 -0
  91. mlx_stack-0.1.0/.factory/validation/recommendation/user-testing/flows/r2-g3-cross-012.json +101 -0
  92. mlx_stack-0.1.0/.factory/validation/recommendation/user-testing/synthesis.json +70 -0
  93. mlx_stack-0.1.0/.factory/validation/recommendation/user-testing/synthesis.round1.json +117 -0
  94. mlx_stack-0.1.0/.factory/validation/tooling/scrutiny/reviews/bench-command.json +40 -0
  95. mlx_stack-0.1.0/.factory/validation/tooling/scrutiny/reviews/fix-tooling-scrutiny-issues.json +56 -0
  96. mlx_stack-0.1.0/.factory/validation/tooling/scrutiny/reviews/pull-command.json +51 -0
  97. mlx_stack-0.1.0/.factory/validation/tooling/scrutiny/synthesis.json +46 -0
  98. mlx_stack-0.1.0/.factory/validation/tooling/scrutiny/synthesis.round1.json +76 -0
  99. mlx_stack-0.1.0/.factory/validation/tooling/user-testing/flows/g1-pull-core.json +98 -0
  100. mlx_stack-0.1.0/.factory/validation/tooling/user-testing/flows/g2-pull-errors.json +91 -0
  101. mlx_stack-0.1.0/.factory/validation/tooling/user-testing/flows/g3-bench-core.json +77 -0
  102. mlx_stack-0.1.0/.factory/validation/tooling/user-testing/flows/g4-bench-advanced.json +68 -0
  103. mlx_stack-0.1.0/.factory/validation/tooling/user-testing/flows/r2-g1-pull.json +75 -0
  104. mlx_stack-0.1.0/.factory/validation/tooling/user-testing/flows/r2-g2-bench.json +111 -0
  105. mlx_stack-0.1.0/.factory/validation/tooling/user-testing/flows/r3-g1-pull.json +51 -0
  106. mlx_stack-0.1.0/.factory/validation/tooling/user-testing/flows/r3-g2-bench.json +55 -0
  107. mlx_stack-0.1.0/.factory/validation/tooling/user-testing/flows/r4-g1-bench.json +55 -0
  108. mlx_stack-0.1.0/.factory/validation/tooling/user-testing/synthesis.json +44 -0
  109. mlx_stack-0.1.0/.factory/validation/tooling/user-testing/synthesis.round1.json +127 -0
  110. mlx_stack-0.1.0/.factory/validation/tooling/user-testing/synthesis.round2.json +81 -0
  111. mlx_stack-0.1.0/.factory/validation/tooling/user-testing/synthesis.round3.json +55 -0
  112. mlx_stack-0.1.0/.github/release.yml +17 -0
  113. mlx_stack-0.1.0/.github/workflows/ci.yml +51 -0
  114. mlx_stack-0.1.0/.github/workflows/publish.yml +71 -0
  115. mlx_stack-0.1.0/.gitignore +36 -0
  116. mlx_stack-0.1.0/CHANGELOG.md +60 -0
  117. mlx_stack-0.1.0/CODE_OF_CONDUCT.md +77 -0
  118. mlx_stack-0.1.0/CONTRIBUTING.md +154 -0
  119. mlx_stack-0.1.0/DEVELOPING.md +820 -0
  120. mlx_stack-0.1.0/LICENSE +21 -0
  121. mlx_stack-0.1.0/PKG-INFO +397 -0
  122. mlx_stack-0.1.0/README.md +368 -0
  123. mlx_stack-0.1.0/SECURITY.md +67 -0
  124. mlx_stack-0.1.0/pyproject.toml +82 -0
  125. mlx_stack-0.1.0/src/mlx_stack/__init__.py +5 -0
  126. mlx_stack-0.1.0/src/mlx_stack/_version.py +24 -0
  127. mlx_stack-0.1.0/src/mlx_stack/cli/__init__.py +5 -0
  128. mlx_stack-0.1.0/src/mlx_stack/cli/bench.py +221 -0
  129. mlx_stack-0.1.0/src/mlx_stack/cli/config.py +166 -0
  130. mlx_stack-0.1.0/src/mlx_stack/cli/down.py +109 -0
  131. mlx_stack-0.1.0/src/mlx_stack/cli/init.py +180 -0
  132. mlx_stack-0.1.0/src/mlx_stack/cli/install.py +165 -0
  133. mlx_stack-0.1.0/src/mlx_stack/cli/logs.py +234 -0
  134. mlx_stack-0.1.0/src/mlx_stack/cli/main.py +187 -0
  135. mlx_stack-0.1.0/src/mlx_stack/cli/models.py +304 -0
  136. mlx_stack-0.1.0/src/mlx_stack/cli/profile.py +65 -0
  137. mlx_stack-0.1.0/src/mlx_stack/cli/pull.py +134 -0
  138. mlx_stack-0.1.0/src/mlx_stack/cli/recommend.py +397 -0
  139. mlx_stack-0.1.0/src/mlx_stack/cli/status.py +111 -0
  140. mlx_stack-0.1.0/src/mlx_stack/cli/up.py +163 -0
  141. mlx_stack-0.1.0/src/mlx_stack/cli/watch.py +252 -0
  142. mlx_stack-0.1.0/src/mlx_stack/core/__init__.py +1 -0
  143. mlx_stack-0.1.0/src/mlx_stack/core/benchmark.py +1182 -0
  144. mlx_stack-0.1.0/src/mlx_stack/core/catalog.py +560 -0
  145. mlx_stack-0.1.0/src/mlx_stack/core/config.py +471 -0
  146. mlx_stack-0.1.0/src/mlx_stack/core/deps.py +323 -0
  147. mlx_stack-0.1.0/src/mlx_stack/core/hardware.py +304 -0
  148. mlx_stack-0.1.0/src/mlx_stack/core/launchd.py +531 -0
  149. mlx_stack-0.1.0/src/mlx_stack/core/litellm_gen.py +188 -0
  150. mlx_stack-0.1.0/src/mlx_stack/core/log_rotation.py +231 -0
  151. mlx_stack-0.1.0/src/mlx_stack/core/log_viewer.py +386 -0
  152. mlx_stack-0.1.0/src/mlx_stack/core/models.py +639 -0
  153. mlx_stack-0.1.0/src/mlx_stack/core/paths.py +79 -0
  154. mlx_stack-0.1.0/src/mlx_stack/core/process.py +887 -0
  155. mlx_stack-0.1.0/src/mlx_stack/core/pull.py +815 -0
  156. mlx_stack-0.1.0/src/mlx_stack/core/scoring.py +611 -0
  157. mlx_stack-0.1.0/src/mlx_stack/core/stack_down.py +317 -0
  158. mlx_stack-0.1.0/src/mlx_stack/core/stack_init.py +524 -0
  159. mlx_stack-0.1.0/src/mlx_stack/core/stack_status.py +229 -0
  160. mlx_stack-0.1.0/src/mlx_stack/core/stack_up.py +856 -0
  161. mlx_stack-0.1.0/src/mlx_stack/core/watchdog.py +744 -0
  162. mlx_stack-0.1.0/src/mlx_stack/data/__init__.py +1 -0
  163. mlx_stack-0.1.0/src/mlx_stack/data/catalog/__init__.py +1 -0
  164. mlx_stack-0.1.0/src/mlx_stack/data/catalog/deepseek-r1-32b.yaml +46 -0
  165. mlx_stack-0.1.0/src/mlx_stack/data/catalog/deepseek-r1-8b.yaml +45 -0
  166. mlx_stack-0.1.0/src/mlx_stack/data/catalog/gemma3-12b.yaml +45 -0
  167. mlx_stack-0.1.0/src/mlx_stack/data/catalog/gemma3-27b.yaml +45 -0
  168. mlx_stack-0.1.0/src/mlx_stack/data/catalog/gemma3-4b.yaml +45 -0
  169. mlx_stack-0.1.0/src/mlx_stack/data/catalog/llama3.3-8b.yaml +44 -0
  170. mlx_stack-0.1.0/src/mlx_stack/data/catalog/nemotron-49b.yaml +41 -0
  171. mlx_stack-0.1.0/src/mlx_stack/data/catalog/nemotron-8b.yaml +44 -0
  172. mlx_stack-0.1.0/src/mlx_stack/data/catalog/qwen3-8b.yaml +45 -0
  173. mlx_stack-0.1.0/src/mlx_stack/data/catalog/qwen3.5-0.8b.yaml +45 -0
  174. mlx_stack-0.1.0/src/mlx_stack/data/catalog/qwen3.5-14b.yaml +46 -0
  175. mlx_stack-0.1.0/src/mlx_stack/data/catalog/qwen3.5-32b.yaml +45 -0
  176. mlx_stack-0.1.0/src/mlx_stack/data/catalog/qwen3.5-3b.yaml +44 -0
  177. mlx_stack-0.1.0/src/mlx_stack/data/catalog/qwen3.5-72b.yaml +42 -0
  178. mlx_stack-0.1.0/src/mlx_stack/data/catalog/qwen3.5-8b.yaml +45 -0
  179. mlx_stack-0.1.0/src/mlx_stack/py.typed +1 -0
  180. mlx_stack-0.1.0/src/mlx_stack/utils/__init__.py +1 -0
  181. mlx_stack-0.1.0/tests/__init__.py +1 -0
  182. mlx_stack-0.1.0/tests/conftest.py +43 -0
  183. mlx_stack-0.1.0/tests/integration/__init__.py +1 -0
  184. mlx_stack-0.1.0/tests/integration/test_inference_e2e.py +394 -0
  185. mlx_stack-0.1.0/tests/integration/test_launchd_e2e.py +260 -0
  186. mlx_stack-0.1.0/tests/unit/__init__.py +1 -0
  187. mlx_stack-0.1.0/tests/unit/test_benchmark.py +1358 -0
  188. mlx_stack-0.1.0/tests/unit/test_catalog.py +795 -0
  189. mlx_stack-0.1.0/tests/unit/test_cli.py +169 -0
  190. mlx_stack-0.1.0/tests/unit/test_cli_bench.py +533 -0
  191. mlx_stack-0.1.0/tests/unit/test_cli_config.py +427 -0
  192. mlx_stack-0.1.0/tests/unit/test_cli_down.py +977 -0
  193. mlx_stack-0.1.0/tests/unit/test_cli_init.py +1194 -0
  194. mlx_stack-0.1.0/tests/unit/test_cli_install.py +387 -0
  195. mlx_stack-0.1.0/tests/unit/test_cli_logs.py +408 -0
  196. mlx_stack-0.1.0/tests/unit/test_cli_models.py +1410 -0
  197. mlx_stack-0.1.0/tests/unit/test_cli_profile.py +400 -0
  198. mlx_stack-0.1.0/tests/unit/test_cli_pull.py +1691 -0
  199. mlx_stack-0.1.0/tests/unit/test_cli_recommend.py +1257 -0
  200. mlx_stack-0.1.0/tests/unit/test_cli_status.py +1288 -0
  201. mlx_stack-0.1.0/tests/unit/test_cli_up.py +1584 -0
  202. mlx_stack-0.1.0/tests/unit/test_cli_watch.py +300 -0
  203. mlx_stack-0.1.0/tests/unit/test_config.py +507 -0
  204. mlx_stack-0.1.0/tests/unit/test_cross_area.py +1352 -0
  205. mlx_stack-0.1.0/tests/unit/test_data_dir.py +47 -0
  206. mlx_stack-0.1.0/tests/unit/test_deps.py +756 -0
  207. mlx_stack-0.1.0/tests/unit/test_hardware.py +492 -0
  208. mlx_stack-0.1.0/tests/unit/test_launchd.py +826 -0
  209. mlx_stack-0.1.0/tests/unit/test_lifecycle_fixes.py +526 -0
  210. mlx_stack-0.1.0/tests/unit/test_litellm_gen.py +304 -0
  211. mlx_stack-0.1.0/tests/unit/test_log_rotation.py +490 -0
  212. mlx_stack-0.1.0/tests/unit/test_log_viewer.py +574 -0
  213. mlx_stack-0.1.0/tests/unit/test_models.py +278 -0
  214. mlx_stack-0.1.0/tests/unit/test_ops_cross_area.py +1468 -0
  215. mlx_stack-0.1.0/tests/unit/test_paths.py +85 -0
  216. mlx_stack-0.1.0/tests/unit/test_process.py +942 -0
  217. mlx_stack-0.1.0/tests/unit/test_robustness_fixes.py +397 -0
  218. mlx_stack-0.1.0/tests/unit/test_scoring.py +1403 -0
  219. mlx_stack-0.1.0/tests/unit/test_watchdog.py +899 -0
  220. mlx_stack-0.1.0/uv.lock +533 -0
@@ -0,0 +1,15 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+
4
+ # Verify Python version
5
+ python_version=$(python3 -c "import sys; print(f'{sys.version_info.major}.{sys.version_info.minor}')")
6
+ required="3.13"
7
+ if [ "$(printf '%s\n' "$required" "$python_version" | sort -V | head -n1)" != "$required" ]; then
8
+ echo "ERROR: Python >= 3.13 required (found $python_version)"
9
+ exit 1
10
+ fi
11
+
12
+ # Install dependencies if pyproject.toml exists
13
+ if [ -f pyproject.toml ]; then
14
+ uv sync
15
+ fi
@@ -0,0 +1,92 @@
1
+ # Architecture
2
+
3
+ Architectural decisions, patterns discovered, and conventions.
4
+
5
+ **What belongs here:** Architecture decisions, module patterns, code conventions.
6
+
7
+ ---
8
+
9
+ ## Project Structure
10
+ - `src/mlx_stack/` — main package (src layout)
11
+ - `src/mlx_stack/cli/` — Click CLI package
12
+ - `cli/__init__.py` — package init
13
+ - `cli/main.py` — CLI entry point with Click command group
14
+ - `cli/profile.py` — `mlx-stack profile` command
15
+ - `cli/config.py` — `mlx-stack config` commands
16
+ - `cli/init.py` — `mlx-stack init` command (stack + LiteLLM config generation)
17
+ - `cli/recommend.py` — `mlx-stack recommend` command
18
+ - `cli/models.py` — `mlx-stack models` command (local model listing + catalog browsing)
19
+ - `src/mlx_stack/core/` — shared business logic modules
20
+ - `core/hardware.py` — hardware detection (Apple Silicon profiling)
21
+ - `core/config.py` — configuration management (YAML-based)
22
+ - `core/catalog.py` — model catalog system (query API over YAML entries)
23
+ - `core/deps.py` — dependency management (auto-installing uv tools)
24
+ - `core/paths.py` — path utilities (`~/.mlx-stack/` and friends)
25
+ - `core/scoring.py` — recommendation scoring engine (intent-weighted composite scoring)
26
+ - `core/litellm_gen.py` — LiteLLM proxy config generation (model_list, router_settings, fallbacks)
27
+ - `core/stack_init.py` — stack initialization logic (port allocation, vllm_flags, overwrite protection)
28
+ - `core/models.py` — local model scanning, catalog listing, size formatting
29
+ - `src/mlx_stack/data/` — static data files
30
+ - `data/catalog/` — shipped YAML catalog files (15 models)
31
+ - `src/mlx_stack/utils/` — utility modules
32
+ - `tests/` — pytest tests
33
+ - `tests/fixtures/` — mock data (profiles, catalogs, etc.)
34
+
35
+ ## Conventions
36
+ - Click for CLI, Rich for terminal output
37
+ - PyYAML for all YAML operations
38
+ - httpx for HTTP requests (async not needed — use sync client)
39
+ - psutil for process management
40
+ - All state lives in `~/.mlx-stack/` (configurable via `model-dir` for models)
41
+ - Tests use `tmp_path` pytest fixture — NEVER touch real `~/.mlx-stack/`
42
+ - External commands (sysctl, system_profiler, subprocess) are always mocked in unit tests
43
+ - Click eager options (`--help`, `--version`) may exit before the group callback runs, so callback-based setup hooks should not be relied on for those code paths
44
+ - Note: The config module currently sends success output to stderr. Future features should use stdout for successful output and stderr only for errors/warnings.
45
+
46
+ ## Key Design Decisions
47
+ - One vllm-mlx process per model (ADR-003)
48
+ - vllm-mlx and litellm managed as pinned uv tools, auto-installed on first use
49
+ - Catalog schema: no int6, disk_size_gb per quant source, min_mlx_lm_version top-level, verified_on in separate data/verification.yaml
50
+ - 2 intents for MVP: balanced, agent-fleet (architecture supports more)
51
+ - 40% default memory budget of total unified memory
52
+ - Recommendation/init budget behavior: budget filtering is per-model eligibility (`model.memory_gb <= budget`); the combined memory of selected tiers can exceed the budget
53
+
54
+ ## Ops Layer (Milestone 5)
55
+
56
+ ### New Modules
57
+ - `core/log_rotation.py` — Copytruncate-based log rotation (copy → gzip → truncate)
58
+ - `core/log_viewer.py` — Log viewing/following/listing logic
59
+ - `core/watchdog.py` — Health polling loop, auto-restart, flap detection, daemon mode
60
+ - `core/launchd.py` — Plist generation/loading/unloading via plistlib + launchctl
61
+ - `cli/logs.py` — `mlx-stack logs` command
62
+ - `cli/watch.py` — `mlx-stack watch` command
63
+ - `cli/install.py` — `mlx-stack install` / `mlx-stack uninstall` commands
64
+
65
+ ### Key Integration Points
66
+ - `process.py:start_service` — Log file open mode changed from "w" to "a" for rotation compatibility
67
+ - `core/config.py` — 2 new keys: log-max-size-mb (int, default 50), log-max-files (int, default 5)
68
+ - `process.py:acquire_lock` — Watchdog uses per-restart lock, not held during polling
69
+ - `paths.py` — Watchdog PID at get_pids_dir()/watchdog.pid
70
+ - `stack_status.py:run_status` — Used by watchdog for health polling
71
+ - `process.py:start_service` / `stop_service` — Used by watchdog for restart
72
+ - `cli/main.py` — 3 new commands registered: logs (Diagnostics), watch (Lifecycle), install/uninstall (Lifecycle)
73
+
74
+ ### Log Rotation Strategy
75
+ - Copytruncate: copy log to archive, gzip compress, truncate original in-place
76
+ - Service FDs remain valid (point to same inode, just at offset 0 after truncation)
77
+ - Naming: service.log.1.gz (most recent) → service.log.N.gz (oldest)
78
+ - Archives shifted up before new rotation
79
+ - No cooperation needed from child processes (vllm-mlx, litellm)
80
+
81
+ ### Log Follow Caveat
82
+ - `core/log_viewer.py:follow_log` detects truncation when `current_size < position`.
83
+ - Edge case: truncate + immediate rewrite back to exactly the previous byte length may not trigger truncation detection (`current_size == position`), so the stream can miss lines until new writes advance file size.
84
+
85
+ ### Watchdog Architecture
86
+ - Single foreground loop (or daemonized with --daemon)
87
+ - Polls get_service_status for all services each interval
88
+ - Restart trigger: crashed state only (PID file exists, process dead)
89
+ - NOT restarted: stopped (no PID file), healthy, degraded
90
+ - Flap detection: rolling window of restart timestamps per service
91
+ - Lock: acquire_lock only during actual restart, released immediately
92
+ - Log rotation: triggered as side-effect of each poll cycle
@@ -0,0 +1,23 @@
1
+ # Environment
2
+
3
+ Environment variables, external dependencies, and setup notes.
4
+
5
+ **What belongs here:** Required env vars, external API keys/services, dependency quirks, platform-specific notes.
6
+ **What does NOT belong here:** Service ports/commands (use `.factory/services.yaml`).
7
+
8
+ ---
9
+
10
+ ## Machine
11
+ - Apple MacBook Pro M5 Max, 128 GB unified memory, 18 CPU cores, 40 GPU cores
12
+ - macOS 26.x
13
+ - Python 3.14.3 (targeting 3.13+ compatibility)
14
+
15
+ ## Tools
16
+ - uv 0.10.12 (package manager)
17
+ - vllm-mlx v0.2.6 (installed as uv tool at ~/.local/bin/vllm-mlx)
18
+ - litellm (installed as uv tool at ~/.local/bin/litellm)
19
+ - For robust `uv tool list` parsing, set `NO_COLOR=1` when invoking uv to avoid ANSI escape sequences in output
20
+
21
+ ## External Dependencies
22
+ - HuggingFace Hub (for model downloads — optional HF_TOKEN for rate limiting)
23
+ - OpenRouter API (optional, for cloud fallback — key stored in ~/.mlx-stack/config.yaml)
@@ -0,0 +1,80 @@
1
+ # User Testing
2
+
3
+ Testing surface, required testing skills/tools, resource cost classification per surface.
4
+
5
+ **What belongs here:** How to test the user-facing surface, tools needed, concurrency limits.
6
+
7
+ ---
8
+
9
+ ## Validation Surface
10
+
11
+ **Surface:** CLI commands executed in terminal
12
+ **Tool:** Direct shell command execution (subprocess or Click CliRunner)
13
+ **Required tools:**
14
+ - Python 3.13+ with uv
15
+ - vllm-mlx v0.2.6 (installed as uv tool)
16
+ - litellm (installed as uv tool)
17
+ - curl (for HTTP endpoint verification)
18
+
19
+ **Setup needed for validation:**
20
+ - A downloaded model (small, e.g., qwen3.5-0.8b int4) for lifecycle testing
21
+ - `mlx-stack init --accept-defaults` to generate configs
22
+ - No browser or GUI tools needed
23
+
24
+ **Gaps:**
25
+ - Full integration testing of `up`/`down`/`status` requires downloaded models and sufficient memory
26
+ - Benchmark validation requires a running model server
27
+ - Tool-call benchmark requires a model that supports tool calling
28
+ - Foundation milestone user-testing run (2026-03-24) observed placeholder CLI surfaces for `models --catalog`, `up`, and `bench`; related catalog/dependency assertions were blocked until those commands are implemented.
29
+
30
+ ## Validation Concurrency
31
+
32
+ **Machine:** M5 Max 128GB, 18 cores, ~97GB free at baseline
33
+ **CLI surface:** Lightweight Python process execution (~100-200MB per validator)
34
+ **Max concurrent validators:** 5
35
+ **Rationale:** Each validator runs a CLI command (Python process ~200MB). 5 concurrent = ~1GB. Even with model servers running during lifecycle tests (~10-20GB per model), the machine has ample headroom. Using 70% of available headroom: 67.9GB available * 0.7 = 47.5GB budget. Each lifecycle validator with a model server: ~12GB worst case. Max concurrent lifecycle validators: 3. For non-lifecycle tests: 5.
36
+
37
+ ## Flow Validator Guidance: CLI
38
+
39
+ - Use only terminal-based validation commands (`uv run mlx-stack ...`) and shell inspection commands.
40
+ - Enforce isolation with a unique `MLX_STACK_HOME` per validator (example: `/tmp/mlx-stack-user-testing/<group-id>`). Never reuse another validator's home.
41
+ - Do not read from or write to real `~/.mlx-stack/`; keep all generated files under each validator's assigned `MLX_STACK_HOME`.
42
+ - Keep evidence in the assigned mission evidence directory only.
43
+ - Stay within assigned assertion scope and avoid commands that mutate global/shared system state.
44
+
45
+ ## Recommendation milestone run notes (2026-03-24)
46
+
47
+ - `recommend` is currently display-only and does **not** persist `profile.json` when auto-detecting hardware.
48
+ - `models --catalog` currently does not expose filter flags for family/tag/capability on the CLI surface.
49
+ - `pull` and `bench` remain placeholder commands in this build, which blocks benchmark-save recommendation validation flows.
50
+ - For validator fixture scripting, prefer `uv run python` over system `python3` so project dependencies (e.g., PyYAML) are available.
51
+
52
+ ## Lifecycle milestone rerun notes (2026-03-24)
53
+
54
+ - In isolated lifecycle rerun flow `r2-g1-fixes`, macOS denied `psutil.net_connections(kind='inet')` with `AccessDenied`; port conflict output fell back to `PID 0 (<unknown>)` even though preflight conflict skipping worked. Treat owner-resolution checks as potentially permission-sensitive on this host.
55
+
56
+ ## Tooling milestone run notes (2026-03-24)
57
+
58
+ - Tooling rerun round 4 confirms `bench qwen3-8b` now passes tool-calling validation (`✓ Valid tool call — round-trip: 5.89s`), resolving VAL-BENCH-008.
59
+
60
+ - Catalog repository availability has drifted: `qwen3.5-*` int4 repos referenced in catalog returned `RepositoryNotFound` during live pull testing. `gemma3-*`, `deepseek-r1-8b`, and `qwen3-8b` int4 repos were reachable.
61
+ - The current Hugging Face CLI package installs `hf` (not `huggingface-cli`). For live pull validation, a local wrapper script (`/tmp/huggingface-cli -> hf`) was used so `mlx-stack pull` subprocess invocation could execute.
62
+ - Tooling rerun (round 2) confirms pull progress is now user-visible with incremental percent updates (`0% ... 100%`) and temp bench-instance flows now start successfully (`bench <model-id>` and `bench --save` pass, including non-conflicting temp-port binding evidence).
63
+ - Remaining tooling gaps after tooling rerun round 2 were: (1) network-error pull still surfaced long upstream traceback output before the concise error summary, and (2) tool-calling benchmark still reported `No tool calls in response` for `qwen3-8b`.
64
+ - Tooling rerun round 3 confirmed network-error pull output is now traceback-free for users (VAL-PULL-008 passed); tool-calling benchmark still fails for `qwen3-8b` with `No tool calls in response` (VAL-BENCH-008).
65
+
66
+ ## Misc-cross-area milestone run notes (2026-03-24)
67
+
68
+ - User-testing flow `r1-g1-cross-flows` validated `VAL-CROSS-001`, `VAL-CROSS-012`, and `VAL-CROSS-013` as passing on the real CLI surface in isolated `MLX_STACK_HOME` mode.
69
+ - `VAL-CROSS-007` remained blocked in this environment because host port `5000` was already occupied by a non-mlx-stack service; `up` correctly reported a conflict and skipped LiteLLM at that port.
70
+ - A workaround run with `litellm-port 5001` confirmed the same config-propagation/startup behavior when a free port is used.
71
+ - Rerun flow `r2-g4-cross-port5050` (after contract update to port `5050`) passed `VAL-CROSS-007`: `up` served LiteLLM on `127.0.0.1:5050` and `/v1/models` returned HTTP 200 while `4000` stayed inactive.
72
+ - Setup finding: the host `litellm` uv tool runtime was missing proxy dependencies (`websockets`, `backoff`, `fastapi`, etc.). Installing proxy extras (`litellm[proxy]`) in that tool environment unblocked LiteLLM startup for user-testing flows.
73
+
74
+ ## Ops milestone run notes (2026-04-01)
75
+
76
+ - On this host, `user-testing-flow-validator` subagent runs intermittently exited early with `insufficient permission to proceed ... Re-run with --skip-permissions-unsafe`. Workaround was to continue validation with isolated direct CLI/test execution while preserving evidence artifacts.
77
+ - Repo-level pytest defaults include quiet output, so assertion-level mapping is hard to prove from `-q` logs. For per-assertion evidence, use:
78
+ - `uv run pytest <files> -o addopts='' -vv`
79
+ which emits test names and pass lines suitable for assertion mapping in synthesis.
80
+
@@ -0,0 +1,9 @@
1
+ commands:
2
+ install: uv sync
3
+ test: uv run pytest -x -q --tb=short
4
+ typecheck: uv run python -m pyright
5
+ lint: uv run ruff check src/ tests/
6
+ format: uv run ruff format src/ tests/
7
+ coverage: uv run pytest --cov=src/mlx_stack --cov-report=term-missing
8
+
9
+ services: {}
@@ -0,0 +1,5 @@
1
+ {
2
+ "enabledPlugins": {
3
+ "core@factory-plugins": true
4
+ }
5
+ }
@@ -0,0 +1,350 @@
1
+ # CLI Feature Worker
2
+
3
+ You are a CLI feature worker for **mlx-stack**, a Python CLI tool that manages local LLM infrastructure on Apple Silicon. You implement features end-to-end: failing tests first, then production code, then verification.
4
+
5
+ ---
6
+
7
+ ## Project Structure
8
+
9
+ ```
10
+ src/mlx_stack/
11
+ ├── __init__.py # Package version
12
+ ├── cli/
13
+ │ ├── __init__.py # Click group (main entry point)
14
+ │ └── <command>.py # One file per CLI command
15
+ ├── core/
16
+ │ ├── __init__.py
17
+ │ ├── hardware.py # Hardware detection
18
+ │ ├── catalog.py # Model catalog loading/querying
19
+ │ ├── config.py # Config persistence (~/.mlx-stack/config.yaml)
20
+ │ ├── scoring.py # Recommendation scoring engine
21
+ │ ├── process.py # Process management (up/down/status)
22
+ │ ├── deps.py # Dependency management (uv tool install)
23
+ │ └── models.py # Model download/inventory
24
+ ├── data/
25
+ │ ├── catalog/ # YAML catalog entries (one per model)
26
+ │ └── verification.yaml
27
+ └── utils/
28
+ └── display.py # Rich output helpers
29
+ tests/
30
+ ├── conftest.py # Shared fixtures (tmp_path, mock hardware, etc.)
31
+ ├── unit/
32
+ │ ├── test_<module>.py # Unit tests for core/ modules
33
+ │ └── test_cli_<cmd>.py # CLI command tests via CliRunner
34
+ └── integration/ # Real system tests (marked, optional)
35
+ ```
36
+
37
+ **Package:** `mlx_stack` | **CLI entry point:** `mlx-stack` | **Config dir:** `~/.mlx-stack/`
38
+
39
+ ---
40
+
41
+ ## Technology Stack
42
+
43
+ - **Python 3.13+** with full type annotations
44
+ - **Click** — CLI framework, command groups
45
+ - **Rich** — All terminal output (tables, panels, progress bars, styled text)
46
+ - **httpx** — HTTP client for health checks and API calls
47
+ - **psutil** — Process monitoring
48
+ - **PyYAML** — Config and catalog file handling
49
+ - **pytest + pytest-cov** — Testing (80%+ coverage on `core/`)
50
+ - **uv** — Package manager (`uv run` for all commands)
51
+ - **Pyright** — Static type checking
52
+
53
+ ---
54
+
55
+ ## Workflow: TDD Feature Implementation
56
+
57
+ Follow this exact sequence for every feature. Do not skip steps.
58
+
59
+ ### Step 1: Understand the Feature
60
+
61
+ 1. Read the task description fully. Identify which CLI command(s) and core module(s) are involved.
62
+ 2. Check existing code — read the relevant files in `src/mlx_stack/cli/` and `src/mlx_stack/core/` to understand current state.
63
+ 3. Identify the public API: what Click commands, function signatures, and data structures are needed.
64
+
65
+ ### Step 2: Write Failing Tests First
66
+
67
+ Write tests BEFORE any production code. Tests define the contract.
68
+
69
+ **CLI command tests** (in `tests/unit/test_cli_<command>.py`):
70
+ ```python
71
+ from click.testing import CliRunner
72
+ from mlx_stack.cli import cli # the main Click group
73
+
74
+ def test_<command>_basic(tmp_path, monkeypatch):
75
+ """<Command> produces expected output for valid input."""
76
+ # Redirect config dir to tmp_path to avoid touching real ~/.mlx-stack/
77
+ monkeypatch.setenv("MLX_STACK_HOME", str(tmp_path))
78
+
79
+ runner = CliRunner()
80
+ result = runner.invoke(cli, ["<command>", "<args>"])
81
+
82
+ assert result.exit_code == 0
83
+ assert "<expected output fragment>" in result.output
84
+
85
+ def test_<command>_error_case(tmp_path, monkeypatch):
86
+ """<Command> shows user-friendly error, no stack trace."""
87
+ monkeypatch.setenv("MLX_STACK_HOME", str(tmp_path))
88
+
89
+ runner = CliRunner()
90
+ result = runner.invoke(cli, ["<command>", "--bad-flag"])
91
+
92
+ assert result.exit_code != 0
93
+ assert "Traceback" not in result.output
94
+ ```
95
+
96
+ **Core module tests** (in `tests/unit/test_<module>.py`):
97
+ ```python
98
+ import pytest
99
+ from mlx_stack.core.<module> import <function_under_test>
100
+
101
+ def test_<function>_happy_path(tmp_path):
102
+ """<Function> returns expected result for valid input."""
103
+ result = <function_under_test>(valid_input, config_dir=tmp_path)
104
+ assert result == expected
105
+
106
+ def test_<function>_edge_case(tmp_path):
107
+ """<Function> handles <edge case> gracefully."""
108
+ result = <function_under_test>(edge_input, config_dir=tmp_path)
109
+ assert result == expected_edge
110
+
111
+ def test_<function>_invalid_input():
112
+ """<Function> raises ValueError for invalid input."""
113
+ with pytest.raises(ValueError, match="<expected message>"):
114
+ <function_under_test>(invalid_input)
115
+ ```
116
+
117
+ **Critical test rules:**
118
+ - **NEVER** read from or write to the real `~/.mlx-stack/` directory. Always use `tmp_path` or `monkeypatch.setenv("MLX_STACK_HOME", str(tmp_path))`.
119
+ - **ALWAYS** mock external system calls:
120
+ - `subprocess.run` / `subprocess.Popen` for sysctl, system_profiler, vllm-mlx, litellm
121
+ - `httpx.Client` / `httpx.AsyncClient` for health checks and API calls
122
+ - `psutil.Process` for process monitoring
123
+ - `shutil.disk_usage` for disk space checks
124
+ - Use `pytest.fixture` for reusable test state (mock hardware profiles, sample catalog entries, tmp config dirs).
125
+ - Test both success and error paths. Error paths must never show Python stack traces — only user-friendly Rich-formatted messages.
126
+
127
+ ### Step 3: Run Tests — Confirm They Fail
128
+
129
+ ```bash
130
+ uv run pytest tests/unit/test_<relevant_files>.py -v
131
+ ```
132
+
133
+ All new tests MUST fail at this point (ImportError or AssertionError). This confirms the tests are actually testing something. If a test passes before implementation, the test is wrong — fix it.
134
+
135
+ ### Step 4: Implement the Feature
136
+
137
+ Now write the minimum production code to make all tests pass.
138
+
139
+ **CLI command file** (`src/mlx_stack/cli/<command>.py`):
140
+ ```python
141
+ """mlx-stack <command> — <one-line description>."""
142
+
143
+ import click
144
+ from rich.console import Console
145
+ from rich.table import Table
146
+
147
+ from mlx_stack.core.<module> import <core_function>
148
+
149
+ console = Console()
150
+
151
+ @click.command()
152
+ @click.option("--flag", help="Description.")
153
+ @click.pass_context
154
+ def <command>(ctx: click.Context, flag: str | None) -> None:
155
+ """<Docstring shown in --help>."""
156
+ try:
157
+ result = <core_function>(...)
158
+ # Use Rich for ALL output
159
+ table = Table(title="...")
160
+ table.add_column(...)
161
+ console.print(table)
162
+ except <ExpectedError> as e:
163
+ console.print(f"[red]Error:[/red] {e}")
164
+ raise SystemExit(1)
165
+ ```
166
+
167
+ **Core module** (`src/mlx_stack/core/<module>.py`):
168
+ ```python
169
+ """<Module description>."""
170
+
171
+ from __future__ import annotations
172
+
173
+ from dataclasses import dataclass
174
+ from pathlib import Path
175
+ # ... typed, documented, no bare exceptions
176
+
177
+ def <function>(input: InputType, *, config_dir: Path | None = None) -> OutputType:
178
+ """<Docstring with Args/Returns/Raises>."""
179
+ ...
180
+ ```
181
+
182
+ **Implementation rules:**
183
+ - Full type annotations on every function signature. Use `from __future__ import annotations`.
184
+ - Docstrings on all public functions and classes.
185
+ - The config directory must default to `~/.mlx-stack/` but be overridable via `MLX_STACK_HOME` env var or function parameter — this is how tests isolate themselves.
186
+ - Use `dataclass` or `TypedDict` for structured data, never raw dicts for domain objects.
187
+ - Rich for ALL terminal output — no bare `print()` calls.
188
+ - Handle errors with specific exception types. Catch at the CLI layer and display with Rich. Never let stack traces reach the user.
189
+ - Register new commands in `src/mlx_stack/cli/__init__.py`:
190
+ ```python
191
+ from mlx_stack.cli.<command> import <command>
192
+ cli.add_command(<command>)
193
+ ```
194
+
195
+ ### Step 5: Run Tests — Confirm They Pass
196
+
197
+ ```bash
198
+ uv run pytest tests/unit/ -v --tb=short
199
+ ```
200
+
201
+ All tests must pass. Fix any failures before proceeding. Do not move on with failing tests.
202
+
203
+ ### Step 6: Run Full Test Suite + Type Checking
204
+
205
+ ```bash
206
+ # Full test suite with coverage
207
+ uv run pytest tests/ -v --cov=mlx_stack --cov-report=term-missing
208
+
209
+ # Type checking
210
+ uv run pyright src/mlx_stack/
211
+ ```
212
+
213
+ **Targets:**
214
+ - All tests pass
215
+ - Coverage on `src/mlx_stack/core/` ≥ 80%
216
+ - Zero Pyright errors (warnings acceptable if justified)
217
+
218
+ Fix any issues before proceeding.
219
+
220
+ ### Step 7: Manual Verification
221
+
222
+ Run the actual CLI command and visually verify the output is correct and well-formatted.
223
+
224
+ ```bash
225
+ # For safe commands (profile, config, models, recommend):
226
+ uv run mlx-stack <command> <args>
227
+
228
+ # For commands that start processes (up, bench):
229
+ uv run mlx-stack <command> --dry-run
230
+ ```
231
+
232
+ Check:
233
+ - Output uses Rich formatting (colors, tables, panels) — not plain text
234
+ - Help text is accurate: `uv run mlx-stack <command> --help`
235
+ - Error cases show friendly messages, not tracebacks
236
+ - Exit codes are correct (0 for success, non-zero for errors)
237
+
238
+ If manual verification reveals issues, fix them and re-run tests.
239
+
240
+ ---
241
+
242
+ ## Mocking Patterns Reference
243
+
244
+ ### Mock Hardware Detection (sysctl / system_profiler)
245
+ ```python
246
+ @pytest.fixture
247
+ def mock_m4_pro(monkeypatch):
248
+ """Mock an M4 Pro with 48GB unified memory."""
249
+ def mock_sysctl(cmd, **kwargs):
250
+ responses = {
251
+ "sysctl -n machdep.cpu.brand_string": "Apple M4 Pro",
252
+ "sysctl -n hw.memsize": "51539607552", # 48GB
253
+ }
254
+ return subprocess.CompletedProcess(cmd, 0, stdout=responses.get(cmd, ""))
255
+
256
+ monkeypatch.setattr("subprocess.run", mock_sysctl)
257
+ # Also mock system_profiler for GPU core count
258
+ ...
259
+ ```
260
+
261
+ ### Mock Subprocess for Process Management
262
+ ```python
263
+ @pytest.fixture
264
+ def mock_processes(monkeypatch, tmp_path):
265
+ """Mock vllm-mlx and litellm process spawning."""
266
+ pids = iter([1001, 1002, 1003])
267
+
268
+ def mock_popen(cmd, **kwargs):
269
+ mock = MagicMock()
270
+ mock.pid = next(pids)
271
+ mock.poll.return_value = None # process is running
272
+ return mock
273
+
274
+ monkeypatch.setattr("subprocess.Popen", mock_popen)
275
+ ```
276
+
277
+ ### Mock HTTP Health Checks
278
+ ```python
279
+ @pytest.fixture
280
+ def mock_health_ok(monkeypatch):
281
+ """Mock healthy HTTP responses from model servers."""
282
+ def mock_get(self, url, **kwargs):
283
+ return httpx.Response(200, json={"status": "ok"})
284
+
285
+ monkeypatch.setattr("httpx.Client.get", mock_get)
286
+ ```
287
+
288
+ ### Isolated Config Directory
289
+ ```python
290
+ @pytest.fixture
291
+ def mlx_home(tmp_path, monkeypatch):
292
+ """Redirect MLX_STACK_HOME to a temp directory."""
293
+ home = tmp_path / ".mlx-stack"
294
+ home.mkdir()
295
+ monkeypatch.setenv("MLX_STACK_HOME", str(home))
296
+ return home
297
+ ```
298
+
299
+ ---
300
+
301
+ ## Handoff Requirements
302
+
303
+ When your implementation is complete, report the following:
304
+
305
+ ### Tests Added
306
+ List every new test file and the test functions within it:
307
+ ```
308
+ tests/unit/test_cli_profile.py
309
+ - test_profile_detects_hardware
310
+ - test_profile_writes_profile_json
311
+ - test_profile_rejects_non_apple_silicon
312
+ - test_profile_unknown_chip_estimates_bandwidth
313
+ tests/unit/test_hardware.py
314
+ - test_detect_m4_pro
315
+ - test_detect_unknown_m_chip
316
+ - test_detect_intel_raises
317
+ - test_bandwidth_lookup_known_chips
318
+ - test_bandwidth_estimation_formula
319
+ ```
320
+
321
+ ### Commands Run (with output summary)
322
+ ```
323
+ $ uv run pytest tests/unit/ -v --cov=mlx_stack --cov-report=term-missing
324
+ → 23 passed, 0 failed, core/hardware.py: 94% coverage
325
+
326
+ $ uv run pyright src/mlx_stack/
327
+ → 0 errors, 0 warnings
328
+
329
+ $ uv run mlx-stack profile
330
+ → Rich table output showing M5 Max, 128GB, 40 GPU cores, 546 GB/s bandwidth
331
+ ```
332
+
333
+ ### Files Created or Modified
334
+ List every file touched with a one-line description of the change:
335
+ ```
336
+ src/mlx_stack/core/hardware.py — NEW: hardware detection module (detect_hardware, estimate_bandwidth)
337
+ src/mlx_stack/cli/profile.py — NEW: profile command implementation
338
+ src/mlx_stack/cli/__init__.py — MODIFIED: registered profile command
339
+ tests/unit/test_hardware.py — NEW: 5 unit tests for hardware detection
340
+ tests/unit/test_cli_profile.py — NEW: 4 CLI tests via CliRunner
341
+ tests/conftest.py — MODIFIED: added mock_m4_pro and mlx_home fixtures
342
+ ```
343
+
344
+ ### Discovered Issues
345
+ Note anything that came up during implementation that the next worker or orchestrator should know about:
346
+ ```
347
+ - system_profiler XML parsing is slow (~2s); consider caching in profile.json
348
+ - psutil not detecting vllm-mlx by name; may need PID-file-based tracking instead
349
+ - (none) — clean implementation, no blockers
350
+ ```
@@ -0,0 +1,33 @@
1
+ {
2
+ "featureId": "configuration-management",
3
+ "reviewedAt": "2026-03-24T00:50:25Z",
4
+ "commitId": "725b662",
5
+ "transcriptSkeletonReviewed": true,
6
+ "diffReviewed": true,
7
+ "status": "pass",
8
+ "codeReview": {
9
+ "summary": "The feature implementation in commit 725b662 covers the required config module and CLI surface (set/get/list/reset), including key validation, typed parsing, defaults, persistence, masking, corrupt/empty file handling, and reset confirmation behavior. I found one non-blocking UX/scripting issue around output streams.",
10
+ "issues": [
11
+ {
12
+ "file": "src/mlx_stack/cli/config.py",
13
+ "line": 24,
14
+ "severity": "non_blocking",
15
+ "description": "The module-level console is configured as `Console(stderr=True)` and is used for successful `config set/get/reset` output (e.g., `console.print(display)` at line 72), so `mlx-stack config get ...` writes value output to stderr. This is inconsistent with `config list` (stdout via `out = Console()` at line 95) and makes stdout capture in shell scripts less ergonomic."
16
+ }
17
+ ]
18
+ },
19
+ "sharedStateObservations": [
20
+ {
21
+ "area": "conventions",
22
+ "observation": "Shared guidance documents are out of sync with the actual CLI module layout, which can mislead workers during feature implementation.",
23
+ "evidence": "Mission AGENTS.md says commands live under `commands/` (lines 16-17), and `.factory/library/architecture.md` says `src/mlx_stack/cli.py` + `src/mlx_stack/commands/` (lines 11-12), while the real codebase uses `src/mlx_stack/cli/` modules (see `src/mlx_stack/cli/main.py`, `src/mlx_stack/cli/config.py`, and directory listing of `src/mlx_stack`)."
24
+ },
25
+ {
26
+ "area": "skills",
27
+ "observation": "Feature metadata requires `cli-feature`, but worker handoff reports that skill was unavailable at runtime; this indicates a registry/discoverability gap between mission metadata and executable skills.",
28
+ "evidence": "Feature entry has `skillName: cli-feature` in mission `features.json`; handoff JSON reports deviation 'cli-feature skill was not found in available skills' and suggests ensuring it is available (`handoffs/2026-03-24T00-41-41-697Z__configuration-management__86a87c3a-0806-4220-9886-13cfa289ee9a.json`, lines 142-148)."
29
+ }
30
+ ],
31
+ "addressesFailureFrom": null,
32
+ "summary": "Reviewed configuration-management for foundation using feature metadata, handoff, commit diff, transcript skeleton, and skill spec. Implementation is functionally complete for the requested behavior, with one non-blocking stderr/stdout consistency issue and two shared-state documentation/skill-availability observations."
33
+ }
@@ -0,0 +1,50 @@
1
+ {
2
+ "featureId": "dependency-management",
3
+ "reviewedAt": "2026-03-24T00:50:27Z",
4
+ "commitId": "e607176",
5
+ "transcriptSkeletonReviewed": true,
6
+ "diffReviewed": true,
7
+ "status": "fail",
8
+ "codeReview": {
9
+ "summary": "The feature introduces the expected module and broad unit coverage, but two implementation defects break core dependency detection behavior on real `uv tool list` output and prevent correct detection of installed `vllm-mlx`.",
10
+ "issues": [
11
+ {
12
+ "file": "src/mlx_stack/core/deps.py",
13
+ "line": 34,
14
+ "severity": "blocking",
15
+ "description": "Incorrect binary mapping: `vllm-mlx` is mapped to `vllm`, but the installed uv tool executable is `vllm-mlx`. This causes false `installed=False` results and can trigger unnecessary installs/post-install failures. Evidence: `which vllm-mlx` resolves to `/Users/weae1504/.local/bin/vllm-mlx`, while `which vllm` is absent."
16
+ },
17
+ {
18
+ "file": "src/mlx_stack/core/deps.py",
19
+ "line": 122,
20
+ "severity": "blocking",
21
+ "description": "Version parsing assumes plain lines like `<tool> v<version>`, but actual captured `uv tool list` output includes ANSI escape sequences and executable bullet lines. As a result, `_get_installed_version` returns `None` for installed tools and version-mismatch warnings are skipped. Evidence: captured stdout lines include `\\u001b[1mlitellm v1.82.6\\u001b[0m`, and `check_dependency('litellm')` returned `installed_version=None`."
22
+ },
23
+ {
24
+ "file": "tests/unit/test_deps.py",
25
+ "line": 71,
26
+ "severity": "non_blocking",
27
+ "description": "Tests encode incorrect assumptions about real environment behavior (`vllm` binary name and unformatted `uv tool list` output), so they do not catch the above production defects."
28
+ }
29
+ ]
30
+ },
31
+ "sharedStateObservations": [
32
+ {
33
+ "area": "skills",
34
+ "observation": "Feature metadata requires `cli-feature`, and the repo contains `.factory/skills/cli-feature/SKILL.md`, but the worker could not invoke that skill in runtime.",
35
+ "evidence": "Transcript skeleton for worker session `b001445d-222c-4684-934e-b1b39be237ad` shows `Tool: Skill {\"skill\":\"cli-feature\"}` followed by `Error: Skill \"cli-feature\" not found`; skill file exists at `/Users/weae1504/Projects/mlx-stack/.factory/skills/cli-feature/SKILL.md`."
36
+ },
37
+ {
38
+ "area": "conventions",
39
+ "observation": "Mission AGENTS conventions describe a `commands/` CLI layout, but the repository and skill documentation use `cli/`. This mismatch can mislead workers about file placement and registration patterns.",
40
+ "evidence": "Mission AGENTS.md lines 16-17 specify `src/mlx_stack/` with `commands/` and one command per module in `commands/`; actual repo uses `src/mlx_stack/cli/` and SKILL.md project structure also documents `cli/`."
41
+ },
42
+ {
43
+ "area": "knowledge",
44
+ "observation": "Shared state does not document `uv tool list` output quirks (ANSI formatting and executable sub-lines), which directly affects reliable parser implementation.",
45
+ "evidence": "Captured `subprocess.run([uv, 'tool', 'list'], capture_output=True, text=True)` output begins with `\\u001b[1mlitellm v1.82.6\\u001b[0m` then `- litellm` style lines; current parser expects plain `tool version` text."
46
+ }
47
+ ],
48
+ "addressesFailureFrom": null,
49
+ "summary": "Review failed: dependency detection has two blocking runtime issues (wrong `vllm-mlx` binary lookup and fragile `uv tool list` parsing), so required behavior is not reliable despite passing unit tests."
50
+ }