lemonade-sdk 8.1.4__py3-none-any.whl → 8.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lemonade-sdk might be problematic. Click here for more details.

Files changed (53) hide show
  1. lemonade/cache.py +6 -1
  2. lemonade/cli.py +47 -5
  3. lemonade/common/inference_engines.py +13 -4
  4. lemonade/common/status.py +4 -4
  5. lemonade/common/system_info.py +544 -1
  6. lemonade/profilers/agt_power.py +437 -0
  7. lemonade/profilers/hwinfo_power.py +429 -0
  8. lemonade/tools/accuracy.py +143 -48
  9. lemonade/tools/adapter.py +6 -1
  10. lemonade/tools/bench.py +26 -8
  11. lemonade/tools/flm/__init__.py +1 -0
  12. lemonade/tools/flm/utils.py +303 -0
  13. lemonade/tools/huggingface/bench.py +6 -1
  14. lemonade/tools/llamacpp/bench.py +146 -27
  15. lemonade/tools/llamacpp/load.py +30 -2
  16. lemonade/tools/llamacpp/utils.py +393 -33
  17. lemonade/tools/oga/bench.py +5 -26
  18. lemonade/tools/oga/load.py +60 -121
  19. lemonade/tools/oga/migration.py +403 -0
  20. lemonade/tools/report/table.py +76 -8
  21. lemonade/tools/server/flm.py +133 -0
  22. lemonade/tools/server/llamacpp.py +220 -553
  23. lemonade/tools/server/serve.py +684 -168
  24. lemonade/tools/server/static/js/chat.js +666 -342
  25. lemonade/tools/server/static/js/model-settings.js +24 -3
  26. lemonade/tools/server/static/js/models.js +597 -73
  27. lemonade/tools/server/static/js/shared.js +79 -14
  28. lemonade/tools/server/static/logs.html +191 -0
  29. lemonade/tools/server/static/styles.css +491 -66
  30. lemonade/tools/server/static/webapp.html +83 -31
  31. lemonade/tools/server/tray.py +158 -38
  32. lemonade/tools/server/utils/macos_tray.py +226 -0
  33. lemonade/tools/server/utils/{system_tray.py → windows_tray.py} +13 -0
  34. lemonade/tools/server/webapp.py +4 -1
  35. lemonade/tools/server/wrapped_server.py +559 -0
  36. lemonade/version.py +1 -1
  37. lemonade_install/install.py +54 -611
  38. {lemonade_sdk-8.1.4.dist-info → lemonade_sdk-8.2.2.dist-info}/METADATA +29 -72
  39. lemonade_sdk-8.2.2.dist-info/RECORD +83 -0
  40. lemonade_server/cli.py +145 -37
  41. lemonade_server/model_manager.py +521 -37
  42. lemonade_server/pydantic_models.py +28 -1
  43. lemonade_server/server_models.json +246 -92
  44. lemonade_server/settings.py +39 -39
  45. lemonade/tools/quark/__init__.py +0 -0
  46. lemonade/tools/quark/quark_load.py +0 -173
  47. lemonade/tools/quark/quark_quantize.py +0 -439
  48. lemonade_sdk-8.1.4.dist-info/RECORD +0 -77
  49. {lemonade_sdk-8.1.4.dist-info → lemonade_sdk-8.2.2.dist-info}/WHEEL +0 -0
  50. {lemonade_sdk-8.1.4.dist-info → lemonade_sdk-8.2.2.dist-info}/entry_points.txt +0 -0
  51. {lemonade_sdk-8.1.4.dist-info → lemonade_sdk-8.2.2.dist-info}/licenses/LICENSE +0 -0
  52. {lemonade_sdk-8.1.4.dist-info → lemonade_sdk-8.2.2.dist-info}/licenses/NOTICE.md +0 -0
  53. {lemonade_sdk-8.1.4.dist-info → lemonade_sdk-8.2.2.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,5 @@
1
1
  import os
2
+ import platform
2
3
  from typing import Optional, Union, List
3
4
 
4
5
  from pydantic import BaseModel
@@ -6,7 +7,28 @@ from pydantic import BaseModel
6
7
  DEFAULT_PORT = int(os.getenv("LEMONADE_PORT", "8000"))
7
8
  DEFAULT_HOST = os.getenv("LEMONADE_HOST", "localhost")
8
9
  DEFAULT_LOG_LEVEL = os.getenv("LEMONADE_LOG_LEVEL", "info")
9
- DEFAULT_LLAMACPP_BACKEND = os.getenv("LEMONADE_LLAMACPP", "vulkan")
10
+
11
+
12
+ # Platform-aware default backend selection
13
+ def _get_default_llamacpp_backend():
14
+ """
15
+ Get the default llamacpp backend based on the current platform.
16
+ """
17
+ # Allow environment variable override
18
+ env_backend = os.getenv("LEMONADE_LLAMACPP")
19
+ if env_backend:
20
+ return env_backend
21
+
22
+ # Platform-specific defaults: use metal for Apple Silicon, vulkan for everything else
23
+ if platform.system() == "Darwin" and platform.machine().lower() in [
24
+ "arm64",
25
+ "aarch64",
26
+ ]:
27
+ return "metal"
28
+ return "vulkan"
29
+
30
+
31
+ DEFAULT_LLAMACPP_BACKEND = _get_default_llamacpp_backend()
10
32
  DEFAULT_CTX_SIZE = int(os.getenv("LEMONADE_CTX_SIZE", "4096"))
11
33
 
12
34
 
@@ -23,6 +45,8 @@ class LoadConfig(BaseModel):
23
45
  recipe: Optional[str] = None
24
46
  # Indicates whether the model is a reasoning model, like DeepSeek
25
47
  reasoning: Optional[bool] = False
48
+ # Indicates whether the model is a vision model with image processing capabilities
49
+ vision: Optional[bool] = False
26
50
  # Indicates which Multimodal Projector (mmproj) file to use
27
51
  mmproj: Optional[str] = None
28
52
 
@@ -46,6 +70,7 @@ class CompletionRequest(BaseModel):
46
70
  top_k: int | None = None
47
71
  top_p: float | None = None
48
72
  max_tokens: int | None = None
73
+ enable_thinking: bool | None = True
49
74
 
50
75
 
51
76
  class ChatCompletionRequest(BaseModel):
@@ -69,6 +94,7 @@ class ChatCompletionRequest(BaseModel):
69
94
  max_tokens: int | None = None
70
95
  max_completion_tokens: int | None = None
71
96
  response_format: dict | None = None
97
+ enable_thinking: bool | None = True
72
98
 
73
99
 
74
100
  class EmbeddingsRequest(BaseModel):
@@ -108,6 +134,7 @@ class ResponsesRequest(BaseModel):
108
134
  top_k: int | None = None
109
135
  top_p: float | None = None
110
136
  stream: bool = False
137
+ enable_thinking: bool | None = True
111
138
 
112
139
 
113
140
  class PullConfig(LoadConfig):
@@ -2,327 +2,481 @@
2
2
  "Qwen2.5-0.5B-Instruct-CPU": {
3
3
  "checkpoint": "amd/Qwen2.5-0.5B-Instruct-quantized_int4-float16-cpu-onnx",
4
4
  "recipe": "oga-cpu",
5
- "suggested": true
5
+ "suggested": true,
6
+ "size": 0.77
6
7
  },
7
8
  "Llama-3.2-1B-Instruct-CPU": {
8
9
  "checkpoint": "amd/Llama-3.2-1B-Instruct-awq-uint4-float16-cpu-onnx",
9
10
  "recipe": "oga-cpu",
10
- "suggested": false
11
+ "suggested": false,
12
+ "size": 1.64
11
13
  },
12
14
  "Llama-3.2-3B-Instruct-CPU": {
13
15
  "checkpoint": "amd/Llama-3.2-3B-Instruct-awq-uint4-float16-cpu-onnx",
14
16
  "recipe": "oga-cpu",
15
- "suggested": false
17
+ "suggested": false,
18
+ "size": 3.15
16
19
  },
17
20
  "Phi-3-Mini-Instruct-CPU": {
18
21
  "checkpoint": "amd/Phi-3-mini-4k-instruct_int4_float16_onnx_cpu",
19
22
  "recipe": "oga-cpu",
20
- "suggested": true
23
+ "suggested": true,
24
+ "size": 2.23
21
25
  },
22
26
  "Qwen-1.5-7B-Chat-CPU": {
23
27
  "checkpoint": "amd/Qwen1.5-7B-Chat_uint4_asym_g128_float16_onnx_cpu",
24
28
  "recipe": "oga-cpu",
25
- "suggested": true
29
+ "suggested": true,
30
+ "size": 5.89
26
31
  },
27
32
  "DeepSeek-R1-Distill-Llama-8B-CPU": {
28
33
  "checkpoint": "amd/DeepSeek-R1-Distill-Llama-8B-awq-asym-uint4-g128-lmhead-onnx-cpu",
29
34
  "recipe": "oga-cpu",
30
35
  "suggested": true,
31
- "labels": ["reasoning"]
36
+ "labels": ["reasoning"],
37
+ "size": 5.78
32
38
  },
33
39
  "DeepSeek-R1-Distill-Qwen-7B-CPU": {
34
40
  "checkpoint": "amd/DeepSeek-R1-Distill-Llama-8B-awq-asym-uint4-g128-lmhead-onnx-cpu",
35
41
  "recipe": "oga-cpu",
36
42
  "suggested": true,
37
- "labels": ["reasoning"]
43
+ "labels": ["reasoning"],
44
+ "size": 5.78
38
45
  },
39
46
  "Llama-3.2-1B-Instruct-Hybrid": {
40
- "checkpoint": "amd/Llama-3.2-1B-Instruct-awq-g128-int4-asym-fp16-onnx-hybrid",
47
+ "checkpoint": "amd/Llama-3.2-1B-Instruct-onnx-ryzenai-hybrid",
41
48
  "recipe": "oga-hybrid",
42
- "suggested": true
49
+ "suggested": true,
50
+ "size": 1.89
43
51
  },
44
52
  "Llama-3.2-3B-Instruct-Hybrid": {
45
- "checkpoint": "amd/Llama-3.2-3B-Instruct-awq-g128-int4-asym-fp16-onnx-hybrid",
53
+ "checkpoint": "amd/Llama-3.2-3B-Instruct-onnx-ryzenai-hybrid",
46
54
  "recipe": "oga-hybrid",
47
- "suggested": true
55
+ "suggested": true,
56
+ "size": 4.28
48
57
  },
49
58
  "Phi-3-Mini-Instruct-Hybrid": {
50
- "checkpoint": "amd/Phi-3-mini-4k-instruct-awq-g128-int4-asym-fp16-onnx-hybrid",
59
+ "checkpoint": "amd/Phi-3-mini-4k-instruct-onnx-ryzenai-hybrid",
51
60
  "recipe": "oga-hybrid",
52
- "suggested": true
61
+ "suggested": true,
62
+ "size": 4.18
53
63
  },
54
64
  "Phi-3.5-Mini-Instruct-Hybrid": {
55
- "checkpoint": "amd/Phi-3.5-mini-instruct-awq-g128-int4-asym-fp16-onnx-hybrid",
65
+ "checkpoint": "amd/Phi-3.5-mini-instruct-onnx-ryzenai-hybrid",
56
66
  "recipe": "oga-hybrid",
57
- "suggested": false
67
+ "suggested": false,
68
+ "size": 4.21
58
69
  },
59
70
  "Qwen-1.5-7B-Chat-Hybrid": {
60
- "checkpoint": "amd/Qwen1.5-7B-Chat-awq-g128-int4-asym-fp16-onnx-hybrid",
71
+ "checkpoint": "amd/Qwen1.5-7B-Chat-onnx-ryzenai-hybrid",
61
72
  "recipe": "oga-hybrid",
62
- "suggested": true
73
+ "suggested": true,
74
+ "size": 8.83
63
75
  },
64
76
  "Qwen-2.5-7B-Instruct-Hybrid": {
65
- "checkpoint": "amd/Qwen2.5-7B-Instruct-awq-uint4-asym-g128-lmhead-g32-fp16-onnx-hybrid",
77
+ "checkpoint": "amd/Qwen2.5-7B-Instruct-onnx-ryzenai-hybrid",
66
78
  "recipe": "oga-hybrid",
67
- "suggested": true
79
+ "suggested": true,
80
+ "size": 8.65
68
81
  },
69
82
  "Qwen-2.5-3B-Instruct-Hybrid": {
70
- "checkpoint": "amd/Qwen2.5-3B-Instruct-awq-uint4-asym-g128-lmhead-g32-fp16-onnx-hybrid",
83
+ "checkpoint": "amd/Qwen2.5-3B-Instruct-onnx-ryzenai-hybrid",
71
84
  "recipe": "oga-hybrid",
72
- "suggested": true
85
+ "suggested": true,
86
+ "size": 3.97
73
87
  },
74
88
  "Qwen-2.5-1.5B-Instruct-Hybrid": {
75
- "checkpoint": "amd/Qwen2.5-1.5B-Instruct-awq-uint4-asym-g128-lmhead-g32-fp16-onnx-hybrid",
89
+ "checkpoint": "amd/Qwen2.5-1.5B-Instruct-onnx-ryzenai-hybrid",
76
90
  "recipe": "oga-hybrid",
77
- "suggested": true
91
+ "suggested": true,
92
+ "size": 2.16
78
93
  },
79
94
  "DeepSeek-R1-Distill-Llama-8B-Hybrid": {
80
- "checkpoint": "amd/DeepSeek-R1-Distill-Llama-8B-awq-asym-uint4-g128-lmhead-onnx-hybrid",
95
+ "checkpoint": "amd/DeepSeek-R1-Distill-Llama-8B-onnx-ryzenai-hybrid",
81
96
  "recipe": "oga-hybrid",
82
97
  "suggested": true,
83
- "labels": ["reasoning"]
98
+ "labels": ["reasoning"],
99
+ "size": 9.09
84
100
  },
85
101
  "DeepSeek-R1-Distill-Qwen-7B-Hybrid": {
86
- "checkpoint": "amd/DeepSeek-R1-Distill-Qwen-7B-awq-asym-uint4-g128-lmhead-onnx-hybrid",
102
+ "checkpoint": "amd/DeepSeek-R1-Distill-Qwen-7B-onnx-ryzenai-hybrid",
87
103
  "recipe": "oga-hybrid",
88
104
  "max_prompt_length": 2000,
89
105
  "suggested": false,
90
- "labels": ["reasoning"]
106
+ "labels": ["reasoning"],
107
+ "size": 8.67
91
108
  },
92
109
  "Mistral-7B-v0.3-Instruct-Hybrid": {
93
- "checkpoint": "amd/Mistral-7B-Instruct-v0.3-awq-g128-int4-asym-fp16-onnx-hybrid",
110
+ "checkpoint": "amd/Mistral-7B-Instruct-v0.3-onnx-ryzenai-hybrid",
94
111
  "recipe": "oga-hybrid",
95
- "suggested": true
112
+ "suggested": true,
113
+ "size": 7.85
96
114
  },
97
115
  "Llama-3.1-8B-Instruct-Hybrid": {
98
- "checkpoint": "amd/Llama-3.1-8B-Instruct-awq-asym-uint4-g128-lmhead-onnx-hybrid",
116
+ "checkpoint": "amd/Meta-Llama-3.1-8B-Instruct-onnx-ryzenai-hybrid",
117
+ "recipe": "oga-hybrid",
118
+ "suggested": true,
119
+ "size": 9.09
120
+ },
121
+ "Qwen3-1.7B-Hybrid": {
122
+ "checkpoint": "amd/Qwen3-1.7B-awq-quant-onnx-hybrid",
123
+ "recipe": "oga-hybrid",
124
+ "suggested": true,
125
+ "labels": ["reasoning"],
126
+ "size": 2.55
127
+ },
128
+ "Phi-4-Mini-Instruct-Hybrid": {
129
+ "checkpoint": "amd/Phi-4-mini-instruct-onnx-ryzenai-hybrid",
99
130
  "recipe": "oga-hybrid",
100
- "suggested": true
131
+ "suggested": true,
132
+ "size": 5.46
133
+ },
134
+ "Qwen3-4B-Hybrid": {
135
+ "checkpoint": "amd/Qwen3-4B-awq-quant-onnx-hybrid",
136
+ "recipe": "oga-hybrid",
137
+ "suggested": true,
138
+ "labels": ["reasoning"],
139
+ "size": 5.17
101
140
  },
102
- "Llama-xLAM-2-8b-fc-r-Hybrid": {
103
- "checkpoint": "amd/Llama-xLAM-2-8b-fc-r-awq-g128-int4-asym-bfp16-onnx-hybrid",
141
+ "Qwen3-8B-Hybrid": {
142
+ "checkpoint": "amd/Qwen3-8B-awq-quant-onnx-hybrid",
104
143
  "recipe": "oga-hybrid",
105
- "suggested": true
144
+ "suggested": true,
145
+ "labels": ["reasoning"],
146
+ "size": 9.42
106
147
  },
107
148
  "Qwen-2.5-7B-Instruct-NPU": {
108
- "checkpoint": "amd/Qwen2.5-7B-Instruct-awq-g128-int4-asym-bf16-onnx-ryzen-strix",
149
+ "checkpoint": "amd/Qwen2.5-7B-Instruct-onnx-ryzenai-npu",
109
150
  "recipe": "oga-npu",
110
- "suggested": true
151
+ "suggested": true,
152
+ "size": 8.82
111
153
  },
112
- "Qwen-2.5-1.5B-Instruct-NPU": {
113
- "checkpoint": "amd/Qwen2.5-1.5B-Instruct-awq-g128-int4-asym-bf16-onnx-ryzen-strix",
154
+ "Qwen-2.5-3B-Instruct-NPU": {
155
+ "checkpoint": "amd/Qwen2.5-3B-Instruct-onnx-ryzenai-npu",
114
156
  "recipe": "oga-npu",
115
- "suggested": true
157
+ "suggested": true,
158
+ "size": 4.09
116
159
  },
117
160
  "DeepSeek-R1-Distill-Llama-8B-NPU": {
118
- "checkpoint": "amd/DeepSeek-R1-Distill-Llama-8B-awq-g128-int4-asym-bf16-onnx-ryzen-strix",
161
+ "checkpoint": "amd/DeepSeek-R1-Distill-Llama-8B-onnx-ryzenai-npu",
119
162
  "recipe": "oga-npu",
120
- "suggested": true
163
+ "suggested": true,
164
+ "size": 9.30
121
165
  },
122
166
  "DeepSeek-R1-Distill-Qwen-7B-NPU": {
123
- "checkpoint": "amd/DeepSeek-R1-Distill-Qwen-7B-awq-g128-int4-asym-bf16-onnx-ryzen-strix",
167
+ "checkpoint": "amd/DeepSeek-R1-Distill-Qwen-7B-onnx-ryzenai-npu",
124
168
  "recipe": "oga-npu",
125
- "suggested": false
169
+ "suggested": false,
170
+ "size": 8.87
126
171
  },
127
172
  "DeepSeek-R1-Distill-Qwen-1.5B-NPU": {
128
- "checkpoint": "amd/DeepSeek-R1-Distill-Qwen-1.5B-awq-g128-int4-asym-bf16-onnx-ryzen-strix",
173
+ "checkpoint": "amd/DeepSeek-R1-Distill-Qwen-1.5B-onnx-ryzenai-npu",
129
174
  "recipe": "oga-npu",
130
- "suggested": false
131
- },
132
- "Llama-3.2-3B-Instruct-NPU": {
133
- "checkpoint": "amd/Llama-3.2-3B-Instruct-awq-g128-int4-asym-bf16-onnx-ryzen-strix",
134
- "recipe": "oga-npu",
135
- "suggested": false
175
+ "suggested": false,
176
+ "size": 2.30
136
177
  },
137
178
  "Llama-3.2-1B-Instruct-NPU": {
138
- "checkpoint": "amd/Llama-3.2-1B-Instruct-awq-g128-int4-asym-bf16-onnx-ryzen-strix",
179
+ "checkpoint": "amd/Llama-3.2-1B-Instruct-onnx-ryzenai-npu",
139
180
  "recipe": "oga-npu",
140
- "suggested": false
181
+ "suggested": false,
182
+ "size": 1.96
141
183
  },
142
184
  "Mistral-7B-v0.3-Instruct-NPU": {
143
- "checkpoint": "amd/Mistral-7B-Instruct-v0.3-awq-g128-int4-asym-bf16-onnx-ryzen-strix",
185
+ "checkpoint": "amd/Mistral-7B-Instruct-v0.3-onnx-ryzenai-npu",
144
186
  "recipe": "oga-npu",
145
- "suggested": true
187
+ "suggested": true,
188
+ "size": 8.09
146
189
  },
147
190
  "Phi-3.5-Mini-Instruct-NPU": {
148
- "checkpoint": "amd/Phi-3.5-mini-instruct-awq-g128-int4-asym-bf16-onnx-ryzen-strix",
191
+ "checkpoint": "amd/Phi-3.5-mini-instruct-onnx-ryzenai-npu",
149
192
  "recipe": "oga-npu",
150
- "suggested": true
193
+ "suggested": true,
194
+ "size": 4.35
151
195
  },
152
196
  "ChatGLM-3-6b-Instruct-NPU": {
153
- "checkpoint": "amd/chatglm3-6b-awq-g128-int4-asym-bf16-onnx-ryzen-strix",
154
- "recipe": "oga-npu",
155
- "suggested": false
156
- },
157
- "AMD-OLMo-1B-Instruct-NPU": {
158
- "checkpoint": "amd/AMD-OLMo-1B-SFT-DPO-awq-g128-int4-asym-bf16-onnx-ryzen-strix",
197
+ "checkpoint": "amd/chatglm3-6b-onnx-ryzenai-npu",
159
198
  "recipe": "oga-npu",
160
- "suggested": false
199
+ "suggested": false,
200
+ "size": 7.03
161
201
  },
162
202
  "Llama-3.2-1B-Instruct-DirectML": {
163
203
  "checkpoint": "amd/Llama-3.2-1B-Instruct-dml-int4-awq-block-128-directml",
164
204
  "recipe": "oga-igpu",
165
- "suggested": false
205
+ "suggested": false,
206
+ "size": 2.81
166
207
  },
167
208
  "Llama-3.2-3B-Instruct-DirectML": {
168
209
  "checkpoint": "amd/Llama-3.2-3B-Instruct-dml-int4-awq-block-128-directml",
169
210
  "recipe": "oga-igpu",
170
- "suggested": false
211
+ "suggested": false,
212
+ "size": 6.75
171
213
  },
172
214
  "Phi-3.5-Mini-Instruct-DirectML": {
173
215
  "checkpoint": "amd/phi3.5-mini-instruct-int4-awq-block-128-directml",
174
216
  "recipe": "oga-igpu",
175
- "suggested": false
217
+ "suggested": false,
218
+ "size": 2.14
176
219
  },
177
220
  "Qwen-1.5-7B-Chat-DirectML": {
178
221
  "checkpoint": "amd/Qwen1.5-7B-Chat-dml-int4-awq-block-128-directml",
179
222
  "recipe": "oga-igpu",
180
- "suggested": false
223
+ "suggested": false,
224
+ "size": 3.73
181
225
  },
182
226
  "Mistral-7B-v0.1-Instruct-DirectML": {
183
227
  "checkpoint": "amd/Mistral-7B-Instruct-v0.1-awq-g128-int4-onnx-directml",
184
228
  "recipe": "oga-igpu",
185
- "suggested": false
229
+ "suggested": false,
230
+ "size": 3.67
186
231
  },
187
232
  "Llama-3-8B-Instruct-DirectML": {
188
233
  "checkpoint": "amd/llama3-8b-instruct-awq-g128-int4-onnx-directml",
189
234
  "recipe": "oga-igpu",
190
- "suggested": false
235
+ "suggested": false,
236
+ "size": 4.61
191
237
  },
192
238
  "Qwen3-0.6B-GGUF": {
193
239
  "checkpoint": "unsloth/Qwen3-0.6B-GGUF:Q4_0",
194
240
  "recipe": "llamacpp",
195
241
  "suggested": true,
196
- "labels": ["reasoning"]
242
+ "labels": ["reasoning"],
243
+ "size": 0.38
197
244
  },
198
245
  "Qwen3-1.7B-GGUF": {
199
246
  "checkpoint": "unsloth/Qwen3-1.7B-GGUF:Q4_0",
200
247
  "recipe": "llamacpp",
201
248
  "suggested": true,
202
- "labels": ["reasoning"]
249
+ "labels": ["reasoning"],
250
+ "size": 1.06
203
251
  },
204
252
  "Qwen3-4B-GGUF": {
205
253
  "checkpoint": "unsloth/Qwen3-4B-GGUF:Q4_0",
206
254
  "recipe": "llamacpp",
207
255
  "suggested": true,
208
- "labels": ["reasoning"]
256
+ "labels": ["reasoning"],
257
+ "size": 2.38
209
258
  },
210
259
  "Qwen3-8B-GGUF": {
211
260
  "checkpoint": "unsloth/Qwen3-8B-GGUF:Q4_1",
212
261
  "recipe": "llamacpp",
213
262
  "suggested": true,
214
- "labels": ["reasoning"]
263
+ "labels": ["reasoning"],
264
+ "size": 5.25
215
265
  },
216
266
  "DeepSeek-Qwen3-8B-GGUF": {
217
267
  "checkpoint": "unsloth/DeepSeek-R1-0528-Qwen3-8B-GGUF:Q4_1",
218
268
  "recipe": "llamacpp",
219
269
  "suggested": true,
220
- "labels": ["reasoning"]
270
+ "labels": ["reasoning"],
271
+ "size": 5.25
221
272
  },
222
273
  "Qwen3-14B-GGUF": {
223
274
  "checkpoint": "unsloth/Qwen3-14B-GGUF:Q4_0",
224
275
  "recipe": "llamacpp",
225
276
  "suggested": true,
226
- "labels": ["reasoning"]
277
+ "labels": ["reasoning"],
278
+ "size": 8.54
279
+ },
280
+ "Qwen3-4B-Instruct-2507-GGUF": {
281
+ "checkpoint": "unsloth/Qwen3-4B-Instruct-2507-GGUF:Qwen3-4B-Instruct-2507-Q4_K_M.gguf",
282
+ "recipe": "llamacpp",
283
+ "suggested": true,
284
+ "labels": ["hot"],
285
+ "size": 2.5
227
286
  },
228
287
  "Qwen3-30B-A3B-GGUF": {
229
288
  "checkpoint": "unsloth/Qwen3-30B-A3B-GGUF:Q4_0",
230
289
  "recipe": "llamacpp",
231
290
  "suggested": true,
232
- "labels": ["reasoning"]
291
+ "labels": ["reasoning"],
292
+ "size": 17.4
233
293
  },
234
294
  "Qwen3-30B-A3B-Instruct-2507-GGUF": {
235
295
  "checkpoint": "unsloth/Qwen3-30B-A3B-Instruct-2507-GGUF:Qwen3-30B-A3B-Instruct-2507-Q4_0.gguf",
236
296
  "recipe": "llamacpp",
237
297
  "suggested": true,
238
- "labels": ["hot"]
298
+ "size": 17.4
239
299
  },
240
300
  "Qwen3-Coder-30B-A3B-Instruct-GGUF": {
241
301
  "checkpoint": "unsloth/Qwen3-Coder-30B-A3B-Instruct-GGUF:Qwen3-Coder-30B-A3B-Instruct-Q4_K_M.gguf",
242
302
  "recipe": "llamacpp",
243
303
  "suggested": true,
244
- "labels": ["coding","hot"]
304
+ "labels": ["coding","tool-calling","hot"],
305
+ "size": 18.6
245
306
  },
246
307
  "Gemma-3-4b-it-GGUF": {
247
308
  "checkpoint": "ggml-org/gemma-3-4b-it-GGUF:Q4_K_M",
248
309
  "mmproj": "mmproj-model-f16.gguf",
249
310
  "recipe": "llamacpp",
250
311
  "suggested": true,
251
- "labels": ["vision"]
312
+ "labels": ["hot","vision"],
313
+ "size": 3.61
252
314
  },
253
315
  "Qwen2.5-VL-7B-Instruct-GGUF": {
254
316
  "checkpoint": "ggml-org/Qwen2.5-VL-7B-Instruct-GGUF:Q4_K_M",
255
317
  "mmproj": "mmproj-Qwen2.5-VL-7B-Instruct-f16.gguf",
256
318
  "recipe": "llamacpp",
257
319
  "suggested": true,
258
- "labels": ["vision"]
320
+ "labels": ["vision"],
321
+ "size": 4.68
259
322
  },
260
323
  "Llama-4-Scout-17B-16E-Instruct-GGUF": {
261
324
  "checkpoint": "unsloth/Llama-4-Scout-17B-16E-Instruct-GGUF:Q4_K_S",
262
325
  "mmproj": "mmproj-F16.gguf",
263
326
  "recipe": "llamacpp",
264
327
  "suggested": true,
265
- "labels": ["vision"]
328
+ "labels": ["vision"],
329
+ "size": 61.5
266
330
  },
267
331
  "Cogito-v2-llama-109B-MoE-GGUF": {
268
332
  "checkpoint": "unsloth/cogito-v2-preview-llama-109B-MoE-GGUF:Q4_K_M",
269
333
  "mmproj": "mmproj-F16.gguf",
270
334
  "recipe": "llamacpp",
271
335
  "suggested": false,
272
- "labels": ["vision"]
336
+ "labels": ["vision"],
337
+ "size": 65.3
273
338
  },
274
339
  "nomic-embed-text-v1-GGUF": {
275
340
  "checkpoint": "nomic-ai/nomic-embed-text-v1-GGUF:Q4_K_S",
276
341
  "recipe": "llamacpp",
277
342
  "suggested": true,
278
- "labels": ["embeddings"]
343
+ "labels": ["embeddings"],
344
+ "size": 0.0781
279
345
  },
280
346
  "nomic-embed-text-v2-moe-GGUF": {
281
347
  "checkpoint": "nomic-ai/nomic-embed-text-v2-moe-GGUF:Q8_0",
282
348
  "recipe": "llamacpp",
283
349
  "suggested": true,
284
- "labels": ["embeddings"]
350
+ "labels": ["embeddings"],
351
+ "size": 0.51
285
352
  },
286
353
  "bge-reranker-v2-m3-GGUF": {
287
354
  "checkpoint": "pqnet/bge-reranker-v2-m3-Q8_0-GGUF",
288
355
  "recipe": "llamacpp",
289
356
  "suggested": true,
290
- "labels": ["reranking"]
357
+ "labels": ["reranking"],
358
+ "size": 0.53
291
359
  },
292
360
  "jina-reranker-v1-tiny-en-GGUF": {
293
361
  "checkpoint": "mradermacher/jina-reranker-v1-tiny-en-GGUF:Q8_0",
294
362
  "recipe": "llamacpp",
295
363
  "suggested": false,
296
- "labels": ["reranking"]
364
+ "labels": ["reranking"],
365
+ "size": 0.03
297
366
  },
298
367
  "Devstral-Small-2507-GGUF":{
299
368
  "checkpoint": "mistralai/Devstral-Small-2507_gguf:Q4_K_M",
300
369
  "recipe": "llamacpp",
301
370
  "suggested": true,
302
- "labels": ["coding"]
371
+ "labels": ["coding","tool-calling"],
372
+ "size": 14.3
303
373
  },
304
374
  "Qwen2.5-Coder-32B-Instruct-GGUF": {
305
375
  "checkpoint": "Qwen/Qwen2.5-Coder-32B-Instruct-GGUF:Q4_K_M",
306
376
  "recipe": "llamacpp",
307
377
  "suggested": true,
308
- "labels": ["coding"]
378
+ "labels": ["coding"],
379
+ "size": 19.85
309
380
  },
310
381
  "gpt-oss-120b-GGUF": {
311
382
  "checkpoint": "unsloth/gpt-oss-120b-GGUF:Q4_K_M",
312
383
  "recipe": "llamacpp",
313
- "suggested": true,
314
- "labels": ["hot", "reasoning"]
384
+ "suggested": false,
385
+ "labels": ["reasoning", "tool-calling"],
386
+ "size": 62.7
315
387
  },
316
388
  "gpt-oss-20b-GGUF": {
317
389
  "checkpoint": "unsloth/gpt-oss-20b-GGUF:Q4_K_M",
318
390
  "recipe": "llamacpp",
391
+ "suggested": false,
392
+ "labels": ["reasoning", "tool-calling"],
393
+ "size": 11.6
394
+ },
395
+ "gpt-oss-120b-mxfp-GGUF": {
396
+ "checkpoint": "ggml-org/gpt-oss-120b-GGUF:*",
397
+ "recipe": "llamacpp",
319
398
  "suggested": true,
320
- "labels": ["hot", "reasoning"]
399
+ "labels": ["hot", "reasoning", "tool-calling"],
400
+ "size": 63.3
401
+ },
402
+ "gpt-oss-20b-mxfp4-GGUF": {
403
+ "checkpoint": "ggml-org/gpt-oss-20b-GGUF",
404
+ "recipe": "llamacpp",
405
+ "suggested": true,
406
+ "labels": ["hot", "reasoning", "tool-calling"],
407
+ "size": 12.1
321
408
  },
322
409
  "GLM-4.5-Air-UD-Q4K-XL-GGUF": {
323
410
  "checkpoint": "unsloth/GLM-4.5-Air-GGUF:UD-Q4_K_XL",
324
411
  "recipe": "llamacpp",
325
412
  "suggested": true,
326
- "labels": ["reasoning","hot"]
413
+ "labels": ["reasoning"],
414
+ "size": 73.1
415
+ },
416
+ "Playable1-GGUF": {
417
+ "checkpoint": "playable/Playable1-GGUF:Playable1-q4_k_m.gguf",
418
+ "recipe": "llamacpp",
419
+ "suggested": false,
420
+ "labels": ["coding"],
421
+ "size": 4.68
422
+ },
423
+ "gpt-oss-20b-FLM": {
424
+ "checkpoint": "gpt-oss:20b",
425
+ "recipe": "flm",
426
+ "suggested": true,
427
+ "labels": ["reasoning"],
428
+ "size": 13.4
429
+ },
430
+ "Gemma3-1b-it-FLM": {
431
+ "checkpoint": "gemma3:1b",
432
+ "recipe": "flm",
433
+ "suggested": true,
434
+ "size": 1.17
435
+ },
436
+ "Gemma3-4b-it-FLM": {
437
+ "checkpoint": "gemma3:4b",
438
+ "recipe": "flm",
439
+ "suggested": true,
440
+ "labels": ["hot","vision"],
441
+ "size": 5.26
442
+ },
443
+ "Qwen3-0.6b-FLM": {
444
+ "checkpoint": "qwen3:0.6b",
445
+ "recipe": "flm",
446
+ "suggested": true,
447
+ "labels": ["reasoning"],
448
+ "size": 0.66
449
+ },
450
+ "Qwen3-4B-Instruct-2507-FLM": {
451
+ "checkpoint": "qwen3-it:4b",
452
+ "recipe": "flm",
453
+ "suggested": true,
454
+ "labels": ["hot"],
455
+ "size": 3.07
456
+ },
457
+ "Qwen3-8b-FLM": {
458
+ "checkpoint": "qwen3:8b",
459
+ "recipe": "flm",
460
+ "suggested": true,
461
+ "labels": ["reasoning"],
462
+ "size": 5.57
463
+ },
464
+ "Llama-3.1-8B-FLM": {
465
+ "checkpoint": "llama3.1:8b",
466
+ "recipe": "flm",
467
+ "suggested": true,
468
+ "size": 5.36
469
+ },
470
+ "Llama-3.2-1B-FLM": {
471
+ "checkpoint": "llama3.2:1b",
472
+ "recipe": "flm",
473
+ "suggested": true,
474
+ "size": 1.21
475
+ },
476
+ "Llama-3.2-3B-FLM": {
477
+ "checkpoint": "llama3.2:3b",
478
+ "recipe": "flm",
479
+ "suggested": true,
480
+ "size": 2.62
327
481
  }
328
482
  }