mlx-stack 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mlx_stack/__init__.py +5 -0
- mlx_stack/_version.py +24 -0
- mlx_stack/cli/__init__.py +5 -0
- mlx_stack/cli/bench.py +221 -0
- mlx_stack/cli/config.py +166 -0
- mlx_stack/cli/down.py +109 -0
- mlx_stack/cli/init.py +180 -0
- mlx_stack/cli/install.py +165 -0
- mlx_stack/cli/logs.py +234 -0
- mlx_stack/cli/main.py +187 -0
- mlx_stack/cli/models.py +304 -0
- mlx_stack/cli/profile.py +65 -0
- mlx_stack/cli/pull.py +134 -0
- mlx_stack/cli/recommend.py +397 -0
- mlx_stack/cli/status.py +111 -0
- mlx_stack/cli/up.py +163 -0
- mlx_stack/cli/watch.py +252 -0
- mlx_stack/core/__init__.py +1 -0
- mlx_stack/core/benchmark.py +1182 -0
- mlx_stack/core/catalog.py +560 -0
- mlx_stack/core/config.py +471 -0
- mlx_stack/core/deps.py +323 -0
- mlx_stack/core/hardware.py +304 -0
- mlx_stack/core/launchd.py +531 -0
- mlx_stack/core/litellm_gen.py +188 -0
- mlx_stack/core/log_rotation.py +231 -0
- mlx_stack/core/log_viewer.py +386 -0
- mlx_stack/core/models.py +639 -0
- mlx_stack/core/paths.py +79 -0
- mlx_stack/core/process.py +887 -0
- mlx_stack/core/pull.py +815 -0
- mlx_stack/core/scoring.py +611 -0
- mlx_stack/core/stack_down.py +317 -0
- mlx_stack/core/stack_init.py +524 -0
- mlx_stack/core/stack_status.py +229 -0
- mlx_stack/core/stack_up.py +856 -0
- mlx_stack/core/watchdog.py +744 -0
- mlx_stack/data/__init__.py +1 -0
- mlx_stack/data/catalog/__init__.py +1 -0
- mlx_stack/data/catalog/deepseek-r1-32b.yaml +46 -0
- mlx_stack/data/catalog/deepseek-r1-8b.yaml +45 -0
- mlx_stack/data/catalog/gemma3-12b.yaml +45 -0
- mlx_stack/data/catalog/gemma3-27b.yaml +45 -0
- mlx_stack/data/catalog/gemma3-4b.yaml +45 -0
- mlx_stack/data/catalog/llama3.3-8b.yaml +44 -0
- mlx_stack/data/catalog/nemotron-49b.yaml +41 -0
- mlx_stack/data/catalog/nemotron-8b.yaml +44 -0
- mlx_stack/data/catalog/qwen3-8b.yaml +45 -0
- mlx_stack/data/catalog/qwen3.5-0.8b.yaml +45 -0
- mlx_stack/data/catalog/qwen3.5-14b.yaml +46 -0
- mlx_stack/data/catalog/qwen3.5-32b.yaml +45 -0
- mlx_stack/data/catalog/qwen3.5-3b.yaml +44 -0
- mlx_stack/data/catalog/qwen3.5-72b.yaml +42 -0
- mlx_stack/data/catalog/qwen3.5-8b.yaml +45 -0
- mlx_stack/py.typed +1 -0
- mlx_stack/utils/__init__.py +1 -0
- mlx_stack-0.1.0.dist-info/METADATA +397 -0
- mlx_stack-0.1.0.dist-info/RECORD +61 -0
- mlx_stack-0.1.0.dist-info/WHEEL +4 -0
- mlx_stack-0.1.0.dist-info/entry_points.txt +2 -0
- mlx_stack-0.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Static data files for mlx-stack."""
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Model catalog YAML data files."""
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
id: deepseek-r1-32b
|
|
2
|
+
name: DeepSeek R1 32B
|
|
3
|
+
family: DeepSeek R1
|
|
4
|
+
params_b: 32.0
|
|
5
|
+
architecture: mamba2-hybrid
|
|
6
|
+
min_mlx_lm_version: "0.22.0"
|
|
7
|
+
sources:
|
|
8
|
+
int4:
|
|
9
|
+
hf_repo: mlx-community/DeepSeek-R1-0528-Qwen3-32B-4bit
|
|
10
|
+
disk_size_gb: 18.0
|
|
11
|
+
int8:
|
|
12
|
+
hf_repo: mlx-community/DeepSeek-R1-0528-Qwen3-32B-8bit
|
|
13
|
+
disk_size_gb: 34.0
|
|
14
|
+
bf16:
|
|
15
|
+
hf_repo: deepseek-ai/DeepSeek-R1-0528-Qwen3-32B
|
|
16
|
+
disk_size_gb: 65.0
|
|
17
|
+
convert_from: true
|
|
18
|
+
capabilities:
|
|
19
|
+
tool_calling: false
|
|
20
|
+
tool_call_parser: null
|
|
21
|
+
thinking: true
|
|
22
|
+
reasoning_parser: deepseek_r1
|
|
23
|
+
vision: false
|
|
24
|
+
quality:
|
|
25
|
+
overall: 85
|
|
26
|
+
coding: 86
|
|
27
|
+
reasoning: 90
|
|
28
|
+
instruction_following: 80
|
|
29
|
+
benchmarks:
|
|
30
|
+
m4-pro-48:
|
|
31
|
+
prompt_tps: 26.0
|
|
32
|
+
gen_tps: 15.0
|
|
33
|
+
memory_gb: 20.0
|
|
34
|
+
m4-max-128:
|
|
35
|
+
prompt_tps: 40.0
|
|
36
|
+
gen_tps: 23.0
|
|
37
|
+
memory_gb: 20.0
|
|
38
|
+
m5-max-128:
|
|
39
|
+
prompt_tps: 44.0
|
|
40
|
+
gen_tps: 25.0
|
|
41
|
+
memory_gb: 20.0
|
|
42
|
+
tags:
|
|
43
|
+
- reasoning
|
|
44
|
+
- thinking
|
|
45
|
+
- quality
|
|
46
|
+
- long-context
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
id: deepseek-r1-8b
|
|
2
|
+
name: DeepSeek R1 8B
|
|
3
|
+
family: DeepSeek R1
|
|
4
|
+
params_b: 8.0
|
|
5
|
+
architecture: mamba2-hybrid
|
|
6
|
+
min_mlx_lm_version: "0.22.0"
|
|
7
|
+
sources:
|
|
8
|
+
int4:
|
|
9
|
+
hf_repo: mlx-community/DeepSeek-R1-0528-Qwen3-8B-4bit
|
|
10
|
+
disk_size_gb: 4.5
|
|
11
|
+
int8:
|
|
12
|
+
hf_repo: mlx-community/DeepSeek-R1-0528-Qwen3-8B-8bit
|
|
13
|
+
disk_size_gb: 8.5
|
|
14
|
+
bf16:
|
|
15
|
+
hf_repo: deepseek-ai/DeepSeek-R1-0528-Qwen3-8B
|
|
16
|
+
disk_size_gb: 16.0
|
|
17
|
+
convert_from: true
|
|
18
|
+
capabilities:
|
|
19
|
+
tool_calling: false
|
|
20
|
+
tool_call_parser: null
|
|
21
|
+
thinking: true
|
|
22
|
+
reasoning_parser: deepseek_r1
|
|
23
|
+
vision: false
|
|
24
|
+
quality:
|
|
25
|
+
overall: 70
|
|
26
|
+
coding: 72
|
|
27
|
+
reasoning: 75
|
|
28
|
+
instruction_following: 64
|
|
29
|
+
benchmarks:
|
|
30
|
+
m4-pro-48:
|
|
31
|
+
prompt_tps: 90.0
|
|
32
|
+
gen_tps: 50.0
|
|
33
|
+
memory_gb: 5.5
|
|
34
|
+
m4-max-128:
|
|
35
|
+
prompt_tps: 135.0
|
|
36
|
+
gen_tps: 74.0
|
|
37
|
+
memory_gb: 5.5
|
|
38
|
+
m5-max-128:
|
|
39
|
+
prompt_tps: 148.0
|
|
40
|
+
gen_tps: 82.0
|
|
41
|
+
memory_gb: 5.5
|
|
42
|
+
tags:
|
|
43
|
+
- reasoning
|
|
44
|
+
- thinking
|
|
45
|
+
- long-context
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
id: gemma3-12b
|
|
2
|
+
name: Gemma 3 12B
|
|
3
|
+
family: Gemma 3
|
|
4
|
+
params_b: 12.0
|
|
5
|
+
architecture: transformer
|
|
6
|
+
min_mlx_lm_version: "0.22.0"
|
|
7
|
+
sources:
|
|
8
|
+
int4:
|
|
9
|
+
hf_repo: mlx-community/gemma-3-12b-it-4bit
|
|
10
|
+
disk_size_gb: 7.0
|
|
11
|
+
int8:
|
|
12
|
+
hf_repo: mlx-community/gemma-3-12b-it-8bit
|
|
13
|
+
disk_size_gb: 13.0
|
|
14
|
+
bf16:
|
|
15
|
+
hf_repo: google/gemma-3-12b-it
|
|
16
|
+
disk_size_gb: 25.0
|
|
17
|
+
convert_from: true
|
|
18
|
+
capabilities:
|
|
19
|
+
tool_calling: false
|
|
20
|
+
tool_call_parser: null
|
|
21
|
+
thinking: false
|
|
22
|
+
reasoning_parser: null
|
|
23
|
+
vision: true
|
|
24
|
+
quality:
|
|
25
|
+
overall: 72
|
|
26
|
+
coding: 68
|
|
27
|
+
reasoning: 70
|
|
28
|
+
instruction_following: 75
|
|
29
|
+
benchmarks:
|
|
30
|
+
m4-pro-48:
|
|
31
|
+
prompt_tps: 62.0
|
|
32
|
+
gen_tps: 35.0
|
|
33
|
+
memory_gb: 8.2
|
|
34
|
+
m4-max-128:
|
|
35
|
+
prompt_tps: 90.0
|
|
36
|
+
gen_tps: 52.0
|
|
37
|
+
memory_gb: 8.2
|
|
38
|
+
m5-max-128:
|
|
39
|
+
prompt_tps: 100.0
|
|
40
|
+
gen_tps: 57.0
|
|
41
|
+
memory_gb: 8.2
|
|
42
|
+
tags:
|
|
43
|
+
- vision
|
|
44
|
+
- balanced
|
|
45
|
+
- multimodal
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
id: gemma3-27b
|
|
2
|
+
name: Gemma 3 27B
|
|
3
|
+
family: Gemma 3
|
|
4
|
+
params_b: 27.0
|
|
5
|
+
architecture: transformer
|
|
6
|
+
min_mlx_lm_version: "0.22.0"
|
|
7
|
+
sources:
|
|
8
|
+
int4:
|
|
9
|
+
hf_repo: mlx-community/gemma-3-27b-it-4bit
|
|
10
|
+
disk_size_gb: 15.0
|
|
11
|
+
int8:
|
|
12
|
+
hf_repo: mlx-community/gemma-3-27b-it-8bit
|
|
13
|
+
disk_size_gb: 28.0
|
|
14
|
+
bf16:
|
|
15
|
+
hf_repo: google/gemma-3-27b-it
|
|
16
|
+
disk_size_gb: 54.0
|
|
17
|
+
convert_from: true
|
|
18
|
+
capabilities:
|
|
19
|
+
tool_calling: false
|
|
20
|
+
tool_call_parser: null
|
|
21
|
+
thinking: false
|
|
22
|
+
reasoning_parser: null
|
|
23
|
+
vision: true
|
|
24
|
+
quality:
|
|
25
|
+
overall: 80
|
|
26
|
+
coding: 76
|
|
27
|
+
reasoning: 78
|
|
28
|
+
instruction_following: 83
|
|
29
|
+
benchmarks:
|
|
30
|
+
m4-pro-48:
|
|
31
|
+
prompt_tps: 30.0
|
|
32
|
+
gen_tps: 18.0
|
|
33
|
+
memory_gb: 17.0
|
|
34
|
+
m4-max-128:
|
|
35
|
+
prompt_tps: 45.0
|
|
36
|
+
gen_tps: 26.0
|
|
37
|
+
memory_gb: 17.0
|
|
38
|
+
m5-max-128:
|
|
39
|
+
prompt_tps: 50.0
|
|
40
|
+
gen_tps: 29.0
|
|
41
|
+
memory_gb: 17.0
|
|
42
|
+
tags:
|
|
43
|
+
- vision
|
|
44
|
+
- quality
|
|
45
|
+
- multimodal
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
id: gemma3-4b
|
|
2
|
+
name: Gemma 3 4B
|
|
3
|
+
family: Gemma 3
|
|
4
|
+
params_b: 4.0
|
|
5
|
+
architecture: transformer
|
|
6
|
+
min_mlx_lm_version: "0.22.0"
|
|
7
|
+
sources:
|
|
8
|
+
int4:
|
|
9
|
+
hf_repo: mlx-community/gemma-3-4b-it-4bit
|
|
10
|
+
disk_size_gb: 2.3
|
|
11
|
+
int8:
|
|
12
|
+
hf_repo: mlx-community/gemma-3-4b-it-8bit
|
|
13
|
+
disk_size_gb: 4.4
|
|
14
|
+
bf16:
|
|
15
|
+
hf_repo: google/gemma-3-4b-it
|
|
16
|
+
disk_size_gb: 8.5
|
|
17
|
+
convert_from: true
|
|
18
|
+
capabilities:
|
|
19
|
+
tool_calling: false
|
|
20
|
+
tool_call_parser: null
|
|
21
|
+
thinking: false
|
|
22
|
+
reasoning_parser: null
|
|
23
|
+
vision: true
|
|
24
|
+
quality:
|
|
25
|
+
overall: 52
|
|
26
|
+
coding: 45
|
|
27
|
+
reasoning: 48
|
|
28
|
+
instruction_following: 58
|
|
29
|
+
benchmarks:
|
|
30
|
+
m4-pro-48:
|
|
31
|
+
prompt_tps: 160.0
|
|
32
|
+
gen_tps: 78.0
|
|
33
|
+
memory_gb: 3.2
|
|
34
|
+
m4-max-128:
|
|
35
|
+
prompt_tps: 230.0
|
|
36
|
+
gen_tps: 115.0
|
|
37
|
+
memory_gb: 3.2
|
|
38
|
+
m5-max-128:
|
|
39
|
+
prompt_tps: 250.0
|
|
40
|
+
gen_tps: 125.0
|
|
41
|
+
memory_gb: 3.2
|
|
42
|
+
tags:
|
|
43
|
+
- vision
|
|
44
|
+
- fast-inference
|
|
45
|
+
- multimodal
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
id: llama3.3-8b
|
|
2
|
+
name: Llama 3.3 8B
|
|
3
|
+
family: Llama 3.3
|
|
4
|
+
params_b: 8.0
|
|
5
|
+
architecture: transformer
|
|
6
|
+
min_mlx_lm_version: "0.22.0"
|
|
7
|
+
sources:
|
|
8
|
+
int4:
|
|
9
|
+
hf_repo: mlx-community/Llama-3.3-8B-Instruct-4bit
|
|
10
|
+
disk_size_gb: 4.5
|
|
11
|
+
int8:
|
|
12
|
+
hf_repo: mlx-community/Llama-3.3-8B-Instruct-8bit
|
|
13
|
+
disk_size_gb: 8.5
|
|
14
|
+
bf16:
|
|
15
|
+
hf_repo: meta-llama/Llama-3.3-8B-Instruct
|
|
16
|
+
disk_size_gb: 16.0
|
|
17
|
+
convert_from: true
|
|
18
|
+
capabilities:
|
|
19
|
+
tool_calling: true
|
|
20
|
+
tool_call_parser: llama3_json
|
|
21
|
+
thinking: false
|
|
22
|
+
reasoning_parser: null
|
|
23
|
+
vision: false
|
|
24
|
+
quality:
|
|
25
|
+
overall: 63
|
|
26
|
+
coding: 60
|
|
27
|
+
reasoning: 58
|
|
28
|
+
instruction_following: 67
|
|
29
|
+
benchmarks:
|
|
30
|
+
m4-pro-48:
|
|
31
|
+
prompt_tps: 100.0
|
|
32
|
+
gen_tps: 55.0
|
|
33
|
+
memory_gb: 5.5
|
|
34
|
+
m4-max-128:
|
|
35
|
+
prompt_tps: 148.0
|
|
36
|
+
gen_tps: 82.0
|
|
37
|
+
memory_gb: 5.5
|
|
38
|
+
m5-max-128:
|
|
39
|
+
prompt_tps: 162.0
|
|
40
|
+
gen_tps: 90.0
|
|
41
|
+
memory_gb: 5.5
|
|
42
|
+
tags:
|
|
43
|
+
- balanced
|
|
44
|
+
- agent-ready
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
id: nemotron-49b
|
|
2
|
+
name: Nemotron 49B
|
|
3
|
+
family: Nemotron
|
|
4
|
+
params_b: 49.0
|
|
5
|
+
architecture: transformer
|
|
6
|
+
min_mlx_lm_version: "0.22.0"
|
|
7
|
+
sources:
|
|
8
|
+
int4:
|
|
9
|
+
hf_repo: mlx-community/Nemotron-Ultra-49B-4bit
|
|
10
|
+
disk_size_gb: 27.0
|
|
11
|
+
int8:
|
|
12
|
+
hf_repo: mlx-community/Nemotron-Ultra-49B-8bit
|
|
13
|
+
disk_size_gb: 52.0
|
|
14
|
+
bf16:
|
|
15
|
+
hf_repo: nvidia/Nemotron-Ultra-49B
|
|
16
|
+
disk_size_gb: 100.0
|
|
17
|
+
convert_from: true
|
|
18
|
+
capabilities:
|
|
19
|
+
tool_calling: true
|
|
20
|
+
tool_call_parser: hermes
|
|
21
|
+
thinking: true
|
|
22
|
+
reasoning_parser: nemotron
|
|
23
|
+
vision: false
|
|
24
|
+
quality:
|
|
25
|
+
overall: 87
|
|
26
|
+
coding: 85
|
|
27
|
+
reasoning: 88
|
|
28
|
+
instruction_following: 88
|
|
29
|
+
benchmarks:
|
|
30
|
+
m4-max-128:
|
|
31
|
+
prompt_tps: 22.0
|
|
32
|
+
gen_tps: 13.0
|
|
33
|
+
memory_gb: 30.0
|
|
34
|
+
m5-max-128:
|
|
35
|
+
prompt_tps: 25.0
|
|
36
|
+
gen_tps: 15.0
|
|
37
|
+
memory_gb: 30.0
|
|
38
|
+
tags:
|
|
39
|
+
- quality
|
|
40
|
+
- thinking
|
|
41
|
+
- agent-ready
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
id: nemotron-8b
|
|
2
|
+
name: Nemotron 8B
|
|
3
|
+
family: Nemotron
|
|
4
|
+
params_b: 8.0
|
|
5
|
+
architecture: transformer
|
|
6
|
+
min_mlx_lm_version: "0.22.0"
|
|
7
|
+
sources:
|
|
8
|
+
int4:
|
|
9
|
+
hf_repo: mlx-community/Nemotron-Mini-8B-4bit
|
|
10
|
+
disk_size_gb: 4.5
|
|
11
|
+
int8:
|
|
12
|
+
hf_repo: mlx-community/Nemotron-Mini-8B-8bit
|
|
13
|
+
disk_size_gb: 8.5
|
|
14
|
+
bf16:
|
|
15
|
+
hf_repo: nvidia/Nemotron-Mini-8B
|
|
16
|
+
disk_size_gb: 16.0
|
|
17
|
+
convert_from: true
|
|
18
|
+
capabilities:
|
|
19
|
+
tool_calling: true
|
|
20
|
+
tool_call_parser: hermes
|
|
21
|
+
thinking: false
|
|
22
|
+
reasoning_parser: null
|
|
23
|
+
vision: false
|
|
24
|
+
quality:
|
|
25
|
+
overall: 65
|
|
26
|
+
coding: 60
|
|
27
|
+
reasoning: 58
|
|
28
|
+
instruction_following: 70
|
|
29
|
+
benchmarks:
|
|
30
|
+
m4-pro-48:
|
|
31
|
+
prompt_tps: 98.0
|
|
32
|
+
gen_tps: 54.0
|
|
33
|
+
memory_gb: 5.5
|
|
34
|
+
m4-max-128:
|
|
35
|
+
prompt_tps: 145.0
|
|
36
|
+
gen_tps: 80.0
|
|
37
|
+
memory_gb: 5.5
|
|
38
|
+
m5-max-128:
|
|
39
|
+
prompt_tps: 160.0
|
|
40
|
+
gen_tps: 88.0
|
|
41
|
+
memory_gb: 5.5
|
|
42
|
+
tags:
|
|
43
|
+
- balanced
|
|
44
|
+
- agent-ready
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
id: qwen3-8b
|
|
2
|
+
name: Qwen 3 8B
|
|
3
|
+
family: Qwen 3
|
|
4
|
+
params_b: 8.0
|
|
5
|
+
architecture: transformer
|
|
6
|
+
min_mlx_lm_version: "0.22.0"
|
|
7
|
+
sources:
|
|
8
|
+
int4:
|
|
9
|
+
hf_repo: mlx-community/Qwen3-8B-4bit
|
|
10
|
+
disk_size_gb: 4.5
|
|
11
|
+
int8:
|
|
12
|
+
hf_repo: mlx-community/Qwen3-8B-8bit
|
|
13
|
+
disk_size_gb: 8.5
|
|
14
|
+
bf16:
|
|
15
|
+
hf_repo: Qwen/Qwen3-8B
|
|
16
|
+
disk_size_gb: 16.0
|
|
17
|
+
convert_from: true
|
|
18
|
+
capabilities:
|
|
19
|
+
tool_calling: true
|
|
20
|
+
tool_call_parser: hermes
|
|
21
|
+
thinking: true
|
|
22
|
+
reasoning_parser: qwen3
|
|
23
|
+
vision: false
|
|
24
|
+
quality:
|
|
25
|
+
overall: 64
|
|
26
|
+
coding: 62
|
|
27
|
+
reasoning: 60
|
|
28
|
+
instruction_following: 68
|
|
29
|
+
benchmarks:
|
|
30
|
+
m4-pro-48:
|
|
31
|
+
prompt_tps: 96.0
|
|
32
|
+
gen_tps: 53.0
|
|
33
|
+
memory_gb: 5.5
|
|
34
|
+
m4-max-128:
|
|
35
|
+
prompt_tps: 142.0
|
|
36
|
+
gen_tps: 78.0
|
|
37
|
+
memory_gb: 5.5
|
|
38
|
+
m5-max-128:
|
|
39
|
+
prompt_tps: 156.0
|
|
40
|
+
gen_tps: 86.0
|
|
41
|
+
memory_gb: 5.5
|
|
42
|
+
tags:
|
|
43
|
+
- balanced
|
|
44
|
+
- agent-ready
|
|
45
|
+
- thinking
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
id: qwen3.5-0.8b
|
|
2
|
+
name: Qwen 3.5 0.8B
|
|
3
|
+
family: Qwen 3.5
|
|
4
|
+
params_b: 0.8
|
|
5
|
+
architecture: transformer
|
|
6
|
+
min_mlx_lm_version: "0.22.0"
|
|
7
|
+
sources:
|
|
8
|
+
int4:
|
|
9
|
+
hf_repo: mlx-community/Qwen3.5-0.8B-4bit
|
|
10
|
+
disk_size_gb: 0.65
|
|
11
|
+
int8:
|
|
12
|
+
hf_repo: mlx-community/Qwen3.5-0.8B-8bit
|
|
13
|
+
disk_size_gb: 0.9
|
|
14
|
+
bf16:
|
|
15
|
+
hf_repo: Qwen/Qwen3.5-0.8B
|
|
16
|
+
disk_size_gb: 1.6
|
|
17
|
+
convert_from: true
|
|
18
|
+
capabilities:
|
|
19
|
+
tool_calling: true
|
|
20
|
+
tool_call_parser: hermes
|
|
21
|
+
thinking: false
|
|
22
|
+
reasoning_parser: null
|
|
23
|
+
vision: false
|
|
24
|
+
quality:
|
|
25
|
+
overall: 42
|
|
26
|
+
coding: 38
|
|
27
|
+
reasoning: 35
|
|
28
|
+
instruction_following: 48
|
|
29
|
+
benchmarks:
|
|
30
|
+
m4-pro-48:
|
|
31
|
+
prompt_tps: 320.0
|
|
32
|
+
gen_tps: 125.0
|
|
33
|
+
memory_gb: 0.8
|
|
34
|
+
m4-max-128:
|
|
35
|
+
prompt_tps: 480.0
|
|
36
|
+
gen_tps: 185.0
|
|
37
|
+
memory_gb: 0.8
|
|
38
|
+
m5-max-128:
|
|
39
|
+
prompt_tps: 510.0
|
|
40
|
+
gen_tps: 195.0
|
|
41
|
+
memory_gb: 0.8
|
|
42
|
+
tags:
|
|
43
|
+
- lightweight
|
|
44
|
+
- fast-inference
|
|
45
|
+
- agent-ready
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
id: qwen3.5-14b
|
|
2
|
+
name: Qwen 3.5 14B
|
|
3
|
+
family: Qwen 3.5
|
|
4
|
+
params_b: 14.0
|
|
5
|
+
architecture: transformer
|
|
6
|
+
min_mlx_lm_version: "0.22.0"
|
|
7
|
+
sources:
|
|
8
|
+
int4:
|
|
9
|
+
hf_repo: mlx-community/Qwen3.5-14B-4bit
|
|
10
|
+
disk_size_gb: 8.0
|
|
11
|
+
int8:
|
|
12
|
+
hf_repo: mlx-community/Qwen3.5-14B-8bit
|
|
13
|
+
disk_size_gb: 15.0
|
|
14
|
+
bf16:
|
|
15
|
+
hf_repo: Qwen/Qwen3.5-14B
|
|
16
|
+
disk_size_gb: 28.0
|
|
17
|
+
convert_from: true
|
|
18
|
+
capabilities:
|
|
19
|
+
tool_calling: true
|
|
20
|
+
tool_call_parser: hermes
|
|
21
|
+
thinking: true
|
|
22
|
+
reasoning_parser: qwen3
|
|
23
|
+
vision: false
|
|
24
|
+
quality:
|
|
25
|
+
overall: 76
|
|
26
|
+
coding: 74
|
|
27
|
+
reasoning: 72
|
|
28
|
+
instruction_following: 80
|
|
29
|
+
benchmarks:
|
|
30
|
+
m4-pro-48:
|
|
31
|
+
prompt_tps: 55.0
|
|
32
|
+
gen_tps: 32.0
|
|
33
|
+
memory_gb: 9.5
|
|
34
|
+
m4-max-128:
|
|
35
|
+
prompt_tps: 82.0
|
|
36
|
+
gen_tps: 48.0
|
|
37
|
+
memory_gb: 9.5
|
|
38
|
+
m5-max-128:
|
|
39
|
+
prompt_tps: 90.0
|
|
40
|
+
gen_tps: 53.0
|
|
41
|
+
memory_gb: 9.5
|
|
42
|
+
tags:
|
|
43
|
+
- balanced
|
|
44
|
+
- agent-ready
|
|
45
|
+
- thinking
|
|
46
|
+
- quality
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
id: qwen3.5-32b
|
|
2
|
+
name: Qwen 3.5 32B
|
|
3
|
+
family: Qwen 3.5
|
|
4
|
+
params_b: 32.0
|
|
5
|
+
architecture: transformer
|
|
6
|
+
min_mlx_lm_version: "0.22.0"
|
|
7
|
+
sources:
|
|
8
|
+
int4:
|
|
9
|
+
hf_repo: mlx-community/Qwen3.5-32B-4bit
|
|
10
|
+
disk_size_gb: 18.0
|
|
11
|
+
int8:
|
|
12
|
+
hf_repo: mlx-community/Qwen3.5-32B-8bit
|
|
13
|
+
disk_size_gb: 34.0
|
|
14
|
+
bf16:
|
|
15
|
+
hf_repo: Qwen/Qwen3.5-32B
|
|
16
|
+
disk_size_gb: 65.0
|
|
17
|
+
convert_from: true
|
|
18
|
+
capabilities:
|
|
19
|
+
tool_calling: true
|
|
20
|
+
tool_call_parser: hermes
|
|
21
|
+
thinking: true
|
|
22
|
+
reasoning_parser: qwen3
|
|
23
|
+
vision: false
|
|
24
|
+
quality:
|
|
25
|
+
overall: 84
|
|
26
|
+
coding: 82
|
|
27
|
+
reasoning: 83
|
|
28
|
+
instruction_following: 86
|
|
29
|
+
benchmarks:
|
|
30
|
+
m4-pro-48:
|
|
31
|
+
prompt_tps: 28.0
|
|
32
|
+
gen_tps: 16.0
|
|
33
|
+
memory_gb: 20.0
|
|
34
|
+
m4-max-128:
|
|
35
|
+
prompt_tps: 42.0
|
|
36
|
+
gen_tps: 24.0
|
|
37
|
+
memory_gb: 20.0
|
|
38
|
+
m5-max-128:
|
|
39
|
+
prompt_tps: 46.0
|
|
40
|
+
gen_tps: 27.0
|
|
41
|
+
memory_gb: 20.0
|
|
42
|
+
tags:
|
|
43
|
+
- quality
|
|
44
|
+
- agent-ready
|
|
45
|
+
- thinking
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
id: qwen3.5-3b
|
|
2
|
+
name: Qwen 3.5 3B
|
|
3
|
+
family: Qwen 3.5
|
|
4
|
+
params_b: 3.0
|
|
5
|
+
architecture: transformer
|
|
6
|
+
min_mlx_lm_version: "0.22.0"
|
|
7
|
+
sources:
|
|
8
|
+
int4:
|
|
9
|
+
hf_repo: mlx-community/Qwen3.5-3B-4bit
|
|
10
|
+
disk_size_gb: 1.8
|
|
11
|
+
int8:
|
|
12
|
+
hf_repo: mlx-community/Qwen3.5-3B-8bit
|
|
13
|
+
disk_size_gb: 3.4
|
|
14
|
+
bf16:
|
|
15
|
+
hf_repo: Qwen/Qwen3.5-3B
|
|
16
|
+
disk_size_gb: 6.5
|
|
17
|
+
convert_from: true
|
|
18
|
+
capabilities:
|
|
19
|
+
tool_calling: true
|
|
20
|
+
tool_call_parser: hermes
|
|
21
|
+
thinking: false
|
|
22
|
+
reasoning_parser: null
|
|
23
|
+
vision: false
|
|
24
|
+
quality:
|
|
25
|
+
overall: 55
|
|
26
|
+
coding: 50
|
|
27
|
+
reasoning: 48
|
|
28
|
+
instruction_following: 60
|
|
29
|
+
benchmarks:
|
|
30
|
+
m4-pro-48:
|
|
31
|
+
prompt_tps: 180.0
|
|
32
|
+
gen_tps: 88.0
|
|
33
|
+
memory_gb: 2.5
|
|
34
|
+
m4-max-128:
|
|
35
|
+
prompt_tps: 260.0
|
|
36
|
+
gen_tps: 130.0
|
|
37
|
+
memory_gb: 2.5
|
|
38
|
+
m5-max-128:
|
|
39
|
+
prompt_tps: 280.0
|
|
40
|
+
gen_tps: 140.0
|
|
41
|
+
memory_gb: 2.5
|
|
42
|
+
tags:
|
|
43
|
+
- fast-inference
|
|
44
|
+
- agent-ready
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
id: qwen3.5-72b
|
|
2
|
+
name: Qwen 3.5 72B
|
|
3
|
+
family: Qwen 3.5
|
|
4
|
+
params_b: 72.0
|
|
5
|
+
architecture: transformer
|
|
6
|
+
min_mlx_lm_version: "0.22.0"
|
|
7
|
+
sources:
|
|
8
|
+
int4:
|
|
9
|
+
hf_repo: mlx-community/Qwen3.5-72B-4bit
|
|
10
|
+
disk_size_gb: 40.0
|
|
11
|
+
int8:
|
|
12
|
+
hf_repo: mlx-community/Qwen3.5-72B-8bit
|
|
13
|
+
disk_size_gb: 75.0
|
|
14
|
+
bf16:
|
|
15
|
+
hf_repo: Qwen/Qwen3.5-72B
|
|
16
|
+
disk_size_gb: 145.0
|
|
17
|
+
convert_from: true
|
|
18
|
+
capabilities:
|
|
19
|
+
tool_calling: true
|
|
20
|
+
tool_call_parser: hermes
|
|
21
|
+
thinking: true
|
|
22
|
+
reasoning_parser: qwen3
|
|
23
|
+
vision: false
|
|
24
|
+
quality:
|
|
25
|
+
overall: 91
|
|
26
|
+
coding: 90
|
|
27
|
+
reasoning: 92
|
|
28
|
+
instruction_following: 92
|
|
29
|
+
benchmarks:
|
|
30
|
+
m4-max-128:
|
|
31
|
+
prompt_tps: 18.0
|
|
32
|
+
gen_tps: 11.0
|
|
33
|
+
memory_gb: 42.0
|
|
34
|
+
m5-max-128:
|
|
35
|
+
prompt_tps: 20.0
|
|
36
|
+
gen_tps: 12.5
|
|
37
|
+
memory_gb: 42.0
|
|
38
|
+
tags:
|
|
39
|
+
- premium
|
|
40
|
+
- quality
|
|
41
|
+
- thinking
|
|
42
|
+
- agent-ready
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
id: qwen3.5-8b
|
|
2
|
+
name: Qwen 3.5 8B
|
|
3
|
+
family: Qwen 3.5
|
|
4
|
+
params_b: 8.0
|
|
5
|
+
architecture: transformer
|
|
6
|
+
min_mlx_lm_version: "0.22.0"
|
|
7
|
+
sources:
|
|
8
|
+
int4:
|
|
9
|
+
hf_repo: mlx-community/Qwen3.5-8B-4bit
|
|
10
|
+
disk_size_gb: 4.5
|
|
11
|
+
int8:
|
|
12
|
+
hf_repo: mlx-community/Qwen3.5-8B-8bit
|
|
13
|
+
disk_size_gb: 8.5
|
|
14
|
+
bf16:
|
|
15
|
+
hf_repo: Qwen/Qwen3.5-8B
|
|
16
|
+
disk_size_gb: 16.0
|
|
17
|
+
convert_from: true
|
|
18
|
+
capabilities:
|
|
19
|
+
tool_calling: true
|
|
20
|
+
tool_call_parser: hermes
|
|
21
|
+
thinking: true
|
|
22
|
+
reasoning_parser: qwen3
|
|
23
|
+
vision: false
|
|
24
|
+
quality:
|
|
25
|
+
overall: 68
|
|
26
|
+
coding: 65
|
|
27
|
+
reasoning: 62
|
|
28
|
+
instruction_following: 72
|
|
29
|
+
benchmarks:
|
|
30
|
+
m4-pro-48:
|
|
31
|
+
prompt_tps: 95.0
|
|
32
|
+
gen_tps: 52.0
|
|
33
|
+
memory_gb: 5.5
|
|
34
|
+
m4-max-128:
|
|
35
|
+
prompt_tps: 140.0
|
|
36
|
+
gen_tps: 77.0
|
|
37
|
+
memory_gb: 5.5
|
|
38
|
+
m5-max-128:
|
|
39
|
+
prompt_tps: 155.0
|
|
40
|
+
gen_tps: 85.0
|
|
41
|
+
memory_gb: 5.5
|
|
42
|
+
tags:
|
|
43
|
+
- balanced
|
|
44
|
+
- agent-ready
|
|
45
|
+
- thinking
|
mlx_stack/py.typed
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Utility modules for mlx-stack."""
|