mlx-stack 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. mlx_stack/__init__.py +5 -0
  2. mlx_stack/_version.py +24 -0
  3. mlx_stack/cli/__init__.py +5 -0
  4. mlx_stack/cli/bench.py +221 -0
  5. mlx_stack/cli/config.py +166 -0
  6. mlx_stack/cli/down.py +109 -0
  7. mlx_stack/cli/init.py +180 -0
  8. mlx_stack/cli/install.py +165 -0
  9. mlx_stack/cli/logs.py +234 -0
  10. mlx_stack/cli/main.py +187 -0
  11. mlx_stack/cli/models.py +304 -0
  12. mlx_stack/cli/profile.py +65 -0
  13. mlx_stack/cli/pull.py +134 -0
  14. mlx_stack/cli/recommend.py +397 -0
  15. mlx_stack/cli/status.py +111 -0
  16. mlx_stack/cli/up.py +163 -0
  17. mlx_stack/cli/watch.py +252 -0
  18. mlx_stack/core/__init__.py +1 -0
  19. mlx_stack/core/benchmark.py +1182 -0
  20. mlx_stack/core/catalog.py +560 -0
  21. mlx_stack/core/config.py +471 -0
  22. mlx_stack/core/deps.py +323 -0
  23. mlx_stack/core/hardware.py +304 -0
  24. mlx_stack/core/launchd.py +531 -0
  25. mlx_stack/core/litellm_gen.py +188 -0
  26. mlx_stack/core/log_rotation.py +231 -0
  27. mlx_stack/core/log_viewer.py +386 -0
  28. mlx_stack/core/models.py +639 -0
  29. mlx_stack/core/paths.py +79 -0
  30. mlx_stack/core/process.py +887 -0
  31. mlx_stack/core/pull.py +815 -0
  32. mlx_stack/core/scoring.py +611 -0
  33. mlx_stack/core/stack_down.py +317 -0
  34. mlx_stack/core/stack_init.py +524 -0
  35. mlx_stack/core/stack_status.py +229 -0
  36. mlx_stack/core/stack_up.py +856 -0
  37. mlx_stack/core/watchdog.py +744 -0
  38. mlx_stack/data/__init__.py +1 -0
  39. mlx_stack/data/catalog/__init__.py +1 -0
  40. mlx_stack/data/catalog/deepseek-r1-32b.yaml +46 -0
  41. mlx_stack/data/catalog/deepseek-r1-8b.yaml +45 -0
  42. mlx_stack/data/catalog/gemma3-12b.yaml +45 -0
  43. mlx_stack/data/catalog/gemma3-27b.yaml +45 -0
  44. mlx_stack/data/catalog/gemma3-4b.yaml +45 -0
  45. mlx_stack/data/catalog/llama3.3-8b.yaml +44 -0
  46. mlx_stack/data/catalog/nemotron-49b.yaml +41 -0
  47. mlx_stack/data/catalog/nemotron-8b.yaml +44 -0
  48. mlx_stack/data/catalog/qwen3-8b.yaml +45 -0
  49. mlx_stack/data/catalog/qwen3.5-0.8b.yaml +45 -0
  50. mlx_stack/data/catalog/qwen3.5-14b.yaml +46 -0
  51. mlx_stack/data/catalog/qwen3.5-32b.yaml +45 -0
  52. mlx_stack/data/catalog/qwen3.5-3b.yaml +44 -0
  53. mlx_stack/data/catalog/qwen3.5-72b.yaml +42 -0
  54. mlx_stack/data/catalog/qwen3.5-8b.yaml +45 -0
  55. mlx_stack/py.typed +1 -0
  56. mlx_stack/utils/__init__.py +1 -0
  57. mlx_stack-0.1.0.dist-info/METADATA +397 -0
  58. mlx_stack-0.1.0.dist-info/RECORD +61 -0
  59. mlx_stack-0.1.0.dist-info/WHEEL +4 -0
  60. mlx_stack-0.1.0.dist-info/entry_points.txt +2 -0
  61. mlx_stack-0.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1 @@
1
+ """Static data files for mlx-stack."""
@@ -0,0 +1 @@
1
+ """Model catalog YAML data files."""
@@ -0,0 +1,46 @@
1
+ id: deepseek-r1-32b
2
+ name: DeepSeek R1 32B
3
+ family: DeepSeek R1
4
+ params_b: 32.0
5
+ architecture: mamba2-hybrid
6
+ min_mlx_lm_version: "0.22.0"
7
+ sources:
8
+ int4:
9
+ hf_repo: mlx-community/DeepSeek-R1-0528-Qwen3-32B-4bit
10
+ disk_size_gb: 18.0
11
+ int8:
12
+ hf_repo: mlx-community/DeepSeek-R1-0528-Qwen3-32B-8bit
13
+ disk_size_gb: 34.0
14
+ bf16:
15
+ hf_repo: deepseek-ai/DeepSeek-R1-0528-Qwen3-32B
16
+ disk_size_gb: 65.0
17
+ convert_from: true
18
+ capabilities:
19
+ tool_calling: false
20
+ tool_call_parser: null
21
+ thinking: true
22
+ reasoning_parser: deepseek_r1
23
+ vision: false
24
+ quality:
25
+ overall: 85
26
+ coding: 86
27
+ reasoning: 90
28
+ instruction_following: 80
29
+ benchmarks:
30
+ m4-pro-48:
31
+ prompt_tps: 26.0
32
+ gen_tps: 15.0
33
+ memory_gb: 20.0
34
+ m4-max-128:
35
+ prompt_tps: 40.0
36
+ gen_tps: 23.0
37
+ memory_gb: 20.0
38
+ m5-max-128:
39
+ prompt_tps: 44.0
40
+ gen_tps: 25.0
41
+ memory_gb: 20.0
42
+ tags:
43
+ - reasoning
44
+ - thinking
45
+ - quality
46
+ - long-context
@@ -0,0 +1,45 @@
1
+ id: deepseek-r1-8b
2
+ name: DeepSeek R1 8B
3
+ family: DeepSeek R1
4
+ params_b: 8.0
5
+ architecture: mamba2-hybrid
6
+ min_mlx_lm_version: "0.22.0"
7
+ sources:
8
+ int4:
9
+ hf_repo: mlx-community/DeepSeek-R1-0528-Qwen3-8B-4bit
10
+ disk_size_gb: 4.5
11
+ int8:
12
+ hf_repo: mlx-community/DeepSeek-R1-0528-Qwen3-8B-8bit
13
+ disk_size_gb: 8.5
14
+ bf16:
15
+ hf_repo: deepseek-ai/DeepSeek-R1-0528-Qwen3-8B
16
+ disk_size_gb: 16.0
17
+ convert_from: true
18
+ capabilities:
19
+ tool_calling: false
20
+ tool_call_parser: null
21
+ thinking: true
22
+ reasoning_parser: deepseek_r1
23
+ vision: false
24
+ quality:
25
+ overall: 70
26
+ coding: 72
27
+ reasoning: 75
28
+ instruction_following: 64
29
+ benchmarks:
30
+ m4-pro-48:
31
+ prompt_tps: 90.0
32
+ gen_tps: 50.0
33
+ memory_gb: 5.5
34
+ m4-max-128:
35
+ prompt_tps: 135.0
36
+ gen_tps: 74.0
37
+ memory_gb: 5.5
38
+ m5-max-128:
39
+ prompt_tps: 148.0
40
+ gen_tps: 82.0
41
+ memory_gb: 5.5
42
+ tags:
43
+ - reasoning
44
+ - thinking
45
+ - long-context
@@ -0,0 +1,45 @@
1
+ id: gemma3-12b
2
+ name: Gemma 3 12B
3
+ family: Gemma 3
4
+ params_b: 12.0
5
+ architecture: transformer
6
+ min_mlx_lm_version: "0.22.0"
7
+ sources:
8
+ int4:
9
+ hf_repo: mlx-community/gemma-3-12b-it-4bit
10
+ disk_size_gb: 7.0
11
+ int8:
12
+ hf_repo: mlx-community/gemma-3-12b-it-8bit
13
+ disk_size_gb: 13.0
14
+ bf16:
15
+ hf_repo: google/gemma-3-12b-it
16
+ disk_size_gb: 25.0
17
+ convert_from: true
18
+ capabilities:
19
+ tool_calling: false
20
+ tool_call_parser: null
21
+ thinking: false
22
+ reasoning_parser: null
23
+ vision: true
24
+ quality:
25
+ overall: 72
26
+ coding: 68
27
+ reasoning: 70
28
+ instruction_following: 75
29
+ benchmarks:
30
+ m4-pro-48:
31
+ prompt_tps: 62.0
32
+ gen_tps: 35.0
33
+ memory_gb: 8.2
34
+ m4-max-128:
35
+ prompt_tps: 90.0
36
+ gen_tps: 52.0
37
+ memory_gb: 8.2
38
+ m5-max-128:
39
+ prompt_tps: 100.0
40
+ gen_tps: 57.0
41
+ memory_gb: 8.2
42
+ tags:
43
+ - vision
44
+ - balanced
45
+ - multimodal
@@ -0,0 +1,45 @@
1
+ id: gemma3-27b
2
+ name: Gemma 3 27B
3
+ family: Gemma 3
4
+ params_b: 27.0
5
+ architecture: transformer
6
+ min_mlx_lm_version: "0.22.0"
7
+ sources:
8
+ int4:
9
+ hf_repo: mlx-community/gemma-3-27b-it-4bit
10
+ disk_size_gb: 15.0
11
+ int8:
12
+ hf_repo: mlx-community/gemma-3-27b-it-8bit
13
+ disk_size_gb: 28.0
14
+ bf16:
15
+ hf_repo: google/gemma-3-27b-it
16
+ disk_size_gb: 54.0
17
+ convert_from: true
18
+ capabilities:
19
+ tool_calling: false
20
+ tool_call_parser: null
21
+ thinking: false
22
+ reasoning_parser: null
23
+ vision: true
24
+ quality:
25
+ overall: 80
26
+ coding: 76
27
+ reasoning: 78
28
+ instruction_following: 83
29
+ benchmarks:
30
+ m4-pro-48:
31
+ prompt_tps: 30.0
32
+ gen_tps: 18.0
33
+ memory_gb: 17.0
34
+ m4-max-128:
35
+ prompt_tps: 45.0
36
+ gen_tps: 26.0
37
+ memory_gb: 17.0
38
+ m5-max-128:
39
+ prompt_tps: 50.0
40
+ gen_tps: 29.0
41
+ memory_gb: 17.0
42
+ tags:
43
+ - vision
44
+ - quality
45
+ - multimodal
@@ -0,0 +1,45 @@
1
+ id: gemma3-4b
2
+ name: Gemma 3 4B
3
+ family: Gemma 3
4
+ params_b: 4.0
5
+ architecture: transformer
6
+ min_mlx_lm_version: "0.22.0"
7
+ sources:
8
+ int4:
9
+ hf_repo: mlx-community/gemma-3-4b-it-4bit
10
+ disk_size_gb: 2.3
11
+ int8:
12
+ hf_repo: mlx-community/gemma-3-4b-it-8bit
13
+ disk_size_gb: 4.4
14
+ bf16:
15
+ hf_repo: google/gemma-3-4b-it
16
+ disk_size_gb: 8.5
17
+ convert_from: true
18
+ capabilities:
19
+ tool_calling: false
20
+ tool_call_parser: null
21
+ thinking: false
22
+ reasoning_parser: null
23
+ vision: true
24
+ quality:
25
+ overall: 52
26
+ coding: 45
27
+ reasoning: 48
28
+ instruction_following: 58
29
+ benchmarks:
30
+ m4-pro-48:
31
+ prompt_tps: 160.0
32
+ gen_tps: 78.0
33
+ memory_gb: 3.2
34
+ m4-max-128:
35
+ prompt_tps: 230.0
36
+ gen_tps: 115.0
37
+ memory_gb: 3.2
38
+ m5-max-128:
39
+ prompt_tps: 250.0
40
+ gen_tps: 125.0
41
+ memory_gb: 3.2
42
+ tags:
43
+ - vision
44
+ - fast-inference
45
+ - multimodal
@@ -0,0 +1,44 @@
1
+ id: llama3.3-8b
2
+ name: Llama 3.3 8B
3
+ family: Llama 3.3
4
+ params_b: 8.0
5
+ architecture: transformer
6
+ min_mlx_lm_version: "0.22.0"
7
+ sources:
8
+ int4:
9
+ hf_repo: mlx-community/Llama-3.3-8B-Instruct-4bit
10
+ disk_size_gb: 4.5
11
+ int8:
12
+ hf_repo: mlx-community/Llama-3.3-8B-Instruct-8bit
13
+ disk_size_gb: 8.5
14
+ bf16:
15
+ hf_repo: meta-llama/Llama-3.3-8B-Instruct
16
+ disk_size_gb: 16.0
17
+ convert_from: true
18
+ capabilities:
19
+ tool_calling: true
20
+ tool_call_parser: llama3_json
21
+ thinking: false
22
+ reasoning_parser: null
23
+ vision: false
24
+ quality:
25
+ overall: 63
26
+ coding: 60
27
+ reasoning: 58
28
+ instruction_following: 67
29
+ benchmarks:
30
+ m4-pro-48:
31
+ prompt_tps: 100.0
32
+ gen_tps: 55.0
33
+ memory_gb: 5.5
34
+ m4-max-128:
35
+ prompt_tps: 148.0
36
+ gen_tps: 82.0
37
+ memory_gb: 5.5
38
+ m5-max-128:
39
+ prompt_tps: 162.0
40
+ gen_tps: 90.0
41
+ memory_gb: 5.5
42
+ tags:
43
+ - balanced
44
+ - agent-ready
@@ -0,0 +1,41 @@
1
+ id: nemotron-49b
2
+ name: Nemotron 49B
3
+ family: Nemotron
4
+ params_b: 49.0
5
+ architecture: transformer
6
+ min_mlx_lm_version: "0.22.0"
7
+ sources:
8
+ int4:
9
+ hf_repo: mlx-community/Nemotron-Ultra-49B-4bit
10
+ disk_size_gb: 27.0
11
+ int8:
12
+ hf_repo: mlx-community/Nemotron-Ultra-49B-8bit
13
+ disk_size_gb: 52.0
14
+ bf16:
15
+ hf_repo: nvidia/Nemotron-Ultra-49B
16
+ disk_size_gb: 100.0
17
+ convert_from: true
18
+ capabilities:
19
+ tool_calling: true
20
+ tool_call_parser: hermes
21
+ thinking: true
22
+ reasoning_parser: nemotron
23
+ vision: false
24
+ quality:
25
+ overall: 87
26
+ coding: 85
27
+ reasoning: 88
28
+ instruction_following: 88
29
+ benchmarks:
30
+ m4-max-128:
31
+ prompt_tps: 22.0
32
+ gen_tps: 13.0
33
+ memory_gb: 30.0
34
+ m5-max-128:
35
+ prompt_tps: 25.0
36
+ gen_tps: 15.0
37
+ memory_gb: 30.0
38
+ tags:
39
+ - quality
40
+ - thinking
41
+ - agent-ready
@@ -0,0 +1,44 @@
1
+ id: nemotron-8b
2
+ name: Nemotron 8B
3
+ family: Nemotron
4
+ params_b: 8.0
5
+ architecture: transformer
6
+ min_mlx_lm_version: "0.22.0"
7
+ sources:
8
+ int4:
9
+ hf_repo: mlx-community/Nemotron-Mini-8B-4bit
10
+ disk_size_gb: 4.5
11
+ int8:
12
+ hf_repo: mlx-community/Nemotron-Mini-8B-8bit
13
+ disk_size_gb: 8.5
14
+ bf16:
15
+ hf_repo: nvidia/Nemotron-Mini-8B
16
+ disk_size_gb: 16.0
17
+ convert_from: true
18
+ capabilities:
19
+ tool_calling: true
20
+ tool_call_parser: hermes
21
+ thinking: false
22
+ reasoning_parser: null
23
+ vision: false
24
+ quality:
25
+ overall: 65
26
+ coding: 60
27
+ reasoning: 58
28
+ instruction_following: 70
29
+ benchmarks:
30
+ m4-pro-48:
31
+ prompt_tps: 98.0
32
+ gen_tps: 54.0
33
+ memory_gb: 5.5
34
+ m4-max-128:
35
+ prompt_tps: 145.0
36
+ gen_tps: 80.0
37
+ memory_gb: 5.5
38
+ m5-max-128:
39
+ prompt_tps: 160.0
40
+ gen_tps: 88.0
41
+ memory_gb: 5.5
42
+ tags:
43
+ - balanced
44
+ - agent-ready
@@ -0,0 +1,45 @@
1
+ id: qwen3-8b
2
+ name: Qwen 3 8B
3
+ family: Qwen 3
4
+ params_b: 8.0
5
+ architecture: transformer
6
+ min_mlx_lm_version: "0.22.0"
7
+ sources:
8
+ int4:
9
+ hf_repo: mlx-community/Qwen3-8B-4bit
10
+ disk_size_gb: 4.5
11
+ int8:
12
+ hf_repo: mlx-community/Qwen3-8B-8bit
13
+ disk_size_gb: 8.5
14
+ bf16:
15
+ hf_repo: Qwen/Qwen3-8B
16
+ disk_size_gb: 16.0
17
+ convert_from: true
18
+ capabilities:
19
+ tool_calling: true
20
+ tool_call_parser: hermes
21
+ thinking: true
22
+ reasoning_parser: qwen3
23
+ vision: false
24
+ quality:
25
+ overall: 64
26
+ coding: 62
27
+ reasoning: 60
28
+ instruction_following: 68
29
+ benchmarks:
30
+ m4-pro-48:
31
+ prompt_tps: 96.0
32
+ gen_tps: 53.0
33
+ memory_gb: 5.5
34
+ m4-max-128:
35
+ prompt_tps: 142.0
36
+ gen_tps: 78.0
37
+ memory_gb: 5.5
38
+ m5-max-128:
39
+ prompt_tps: 156.0
40
+ gen_tps: 86.0
41
+ memory_gb: 5.5
42
+ tags:
43
+ - balanced
44
+ - agent-ready
45
+ - thinking
@@ -0,0 +1,45 @@
1
+ id: qwen3.5-0.8b
2
+ name: Qwen 3.5 0.8B
3
+ family: Qwen 3.5
4
+ params_b: 0.8
5
+ architecture: transformer
6
+ min_mlx_lm_version: "0.22.0"
7
+ sources:
8
+ int4:
9
+ hf_repo: mlx-community/Qwen3.5-0.8B-4bit
10
+ disk_size_gb: 0.65
11
+ int8:
12
+ hf_repo: mlx-community/Qwen3.5-0.8B-8bit
13
+ disk_size_gb: 0.9
14
+ bf16:
15
+ hf_repo: Qwen/Qwen3.5-0.8B
16
+ disk_size_gb: 1.6
17
+ convert_from: true
18
+ capabilities:
19
+ tool_calling: true
20
+ tool_call_parser: hermes
21
+ thinking: false
22
+ reasoning_parser: null
23
+ vision: false
24
+ quality:
25
+ overall: 42
26
+ coding: 38
27
+ reasoning: 35
28
+ instruction_following: 48
29
+ benchmarks:
30
+ m4-pro-48:
31
+ prompt_tps: 320.0
32
+ gen_tps: 125.0
33
+ memory_gb: 0.8
34
+ m4-max-128:
35
+ prompt_tps: 480.0
36
+ gen_tps: 185.0
37
+ memory_gb: 0.8
38
+ m5-max-128:
39
+ prompt_tps: 510.0
40
+ gen_tps: 195.0
41
+ memory_gb: 0.8
42
+ tags:
43
+ - lightweight
44
+ - fast-inference
45
+ - agent-ready
@@ -0,0 +1,46 @@
1
+ id: qwen3.5-14b
2
+ name: Qwen 3.5 14B
3
+ family: Qwen 3.5
4
+ params_b: 14.0
5
+ architecture: transformer
6
+ min_mlx_lm_version: "0.22.0"
7
+ sources:
8
+ int4:
9
+ hf_repo: mlx-community/Qwen3.5-14B-4bit
10
+ disk_size_gb: 8.0
11
+ int8:
12
+ hf_repo: mlx-community/Qwen3.5-14B-8bit
13
+ disk_size_gb: 15.0
14
+ bf16:
15
+ hf_repo: Qwen/Qwen3.5-14B
16
+ disk_size_gb: 28.0
17
+ convert_from: true
18
+ capabilities:
19
+ tool_calling: true
20
+ tool_call_parser: hermes
21
+ thinking: true
22
+ reasoning_parser: qwen3
23
+ vision: false
24
+ quality:
25
+ overall: 76
26
+ coding: 74
27
+ reasoning: 72
28
+ instruction_following: 80
29
+ benchmarks:
30
+ m4-pro-48:
31
+ prompt_tps: 55.0
32
+ gen_tps: 32.0
33
+ memory_gb: 9.5
34
+ m4-max-128:
35
+ prompt_tps: 82.0
36
+ gen_tps: 48.0
37
+ memory_gb: 9.5
38
+ m5-max-128:
39
+ prompt_tps: 90.0
40
+ gen_tps: 53.0
41
+ memory_gb: 9.5
42
+ tags:
43
+ - balanced
44
+ - agent-ready
45
+ - thinking
46
+ - quality
@@ -0,0 +1,45 @@
1
+ id: qwen3.5-32b
2
+ name: Qwen 3.5 32B
3
+ family: Qwen 3.5
4
+ params_b: 32.0
5
+ architecture: transformer
6
+ min_mlx_lm_version: "0.22.0"
7
+ sources:
8
+ int4:
9
+ hf_repo: mlx-community/Qwen3.5-32B-4bit
10
+ disk_size_gb: 18.0
11
+ int8:
12
+ hf_repo: mlx-community/Qwen3.5-32B-8bit
13
+ disk_size_gb: 34.0
14
+ bf16:
15
+ hf_repo: Qwen/Qwen3.5-32B
16
+ disk_size_gb: 65.0
17
+ convert_from: true
18
+ capabilities:
19
+ tool_calling: true
20
+ tool_call_parser: hermes
21
+ thinking: true
22
+ reasoning_parser: qwen3
23
+ vision: false
24
+ quality:
25
+ overall: 84
26
+ coding: 82
27
+ reasoning: 83
28
+ instruction_following: 86
29
+ benchmarks:
30
+ m4-pro-48:
31
+ prompt_tps: 28.0
32
+ gen_tps: 16.0
33
+ memory_gb: 20.0
34
+ m4-max-128:
35
+ prompt_tps: 42.0
36
+ gen_tps: 24.0
37
+ memory_gb: 20.0
38
+ m5-max-128:
39
+ prompt_tps: 46.0
40
+ gen_tps: 27.0
41
+ memory_gb: 20.0
42
+ tags:
43
+ - quality
44
+ - agent-ready
45
+ - thinking
@@ -0,0 +1,44 @@
1
+ id: qwen3.5-3b
2
+ name: Qwen 3.5 3B
3
+ family: Qwen 3.5
4
+ params_b: 3.0
5
+ architecture: transformer
6
+ min_mlx_lm_version: "0.22.0"
7
+ sources:
8
+ int4:
9
+ hf_repo: mlx-community/Qwen3.5-3B-4bit
10
+ disk_size_gb: 1.8
11
+ int8:
12
+ hf_repo: mlx-community/Qwen3.5-3B-8bit
13
+ disk_size_gb: 3.4
14
+ bf16:
15
+ hf_repo: Qwen/Qwen3.5-3B
16
+ disk_size_gb: 6.5
17
+ convert_from: true
18
+ capabilities:
19
+ tool_calling: true
20
+ tool_call_parser: hermes
21
+ thinking: false
22
+ reasoning_parser: null
23
+ vision: false
24
+ quality:
25
+ overall: 55
26
+ coding: 50
27
+ reasoning: 48
28
+ instruction_following: 60
29
+ benchmarks:
30
+ m4-pro-48:
31
+ prompt_tps: 180.0
32
+ gen_tps: 88.0
33
+ memory_gb: 2.5
34
+ m4-max-128:
35
+ prompt_tps: 260.0
36
+ gen_tps: 130.0
37
+ memory_gb: 2.5
38
+ m5-max-128:
39
+ prompt_tps: 280.0
40
+ gen_tps: 140.0
41
+ memory_gb: 2.5
42
+ tags:
43
+ - fast-inference
44
+ - agent-ready
@@ -0,0 +1,42 @@
1
+ id: qwen3.5-72b
2
+ name: Qwen 3.5 72B
3
+ family: Qwen 3.5
4
+ params_b: 72.0
5
+ architecture: transformer
6
+ min_mlx_lm_version: "0.22.0"
7
+ sources:
8
+ int4:
9
+ hf_repo: mlx-community/Qwen3.5-72B-4bit
10
+ disk_size_gb: 40.0
11
+ int8:
12
+ hf_repo: mlx-community/Qwen3.5-72B-8bit
13
+ disk_size_gb: 75.0
14
+ bf16:
15
+ hf_repo: Qwen/Qwen3.5-72B
16
+ disk_size_gb: 145.0
17
+ convert_from: true
18
+ capabilities:
19
+ tool_calling: true
20
+ tool_call_parser: hermes
21
+ thinking: true
22
+ reasoning_parser: qwen3
23
+ vision: false
24
+ quality:
25
+ overall: 91
26
+ coding: 90
27
+ reasoning: 92
28
+ instruction_following: 92
29
+ benchmarks:
30
+ m4-max-128:
31
+ prompt_tps: 18.0
32
+ gen_tps: 11.0
33
+ memory_gb: 42.0
34
+ m5-max-128:
35
+ prompt_tps: 20.0
36
+ gen_tps: 12.5
37
+ memory_gb: 42.0
38
+ tags:
39
+ - premium
40
+ - quality
41
+ - thinking
42
+ - agent-ready
@@ -0,0 +1,45 @@
1
+ id: qwen3.5-8b
2
+ name: Qwen 3.5 8B
3
+ family: Qwen 3.5
4
+ params_b: 8.0
5
+ architecture: transformer
6
+ min_mlx_lm_version: "0.22.0"
7
+ sources:
8
+ int4:
9
+ hf_repo: mlx-community/Qwen3.5-8B-4bit
10
+ disk_size_gb: 4.5
11
+ int8:
12
+ hf_repo: mlx-community/Qwen3.5-8B-8bit
13
+ disk_size_gb: 8.5
14
+ bf16:
15
+ hf_repo: Qwen/Qwen3.5-8B
16
+ disk_size_gb: 16.0
17
+ convert_from: true
18
+ capabilities:
19
+ tool_calling: true
20
+ tool_call_parser: hermes
21
+ thinking: true
22
+ reasoning_parser: qwen3
23
+ vision: false
24
+ quality:
25
+ overall: 68
26
+ coding: 65
27
+ reasoning: 62
28
+ instruction_following: 72
29
+ benchmarks:
30
+ m4-pro-48:
31
+ prompt_tps: 95.0
32
+ gen_tps: 52.0
33
+ memory_gb: 5.5
34
+ m4-max-128:
35
+ prompt_tps: 140.0
36
+ gen_tps: 77.0
37
+ memory_gb: 5.5
38
+ m5-max-128:
39
+ prompt_tps: 155.0
40
+ gen_tps: 85.0
41
+ memory_gb: 5.5
42
+ tags:
43
+ - balanced
44
+ - agent-ready
45
+ - thinking
mlx_stack/py.typed ADDED
@@ -0,0 +1 @@
1
+
@@ -0,0 +1 @@
1
+ """Utility modules for mlx-stack."""