lemonade-sdk 8.1.9__py3-none-any.whl → 8.1.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lemonade-sdk might be problematic. Click here for more details.

Files changed (33) hide show
  1. lemonade/common/inference_engines.py +13 -4
  2. lemonade/common/system_info.py +570 -1
  3. lemonade/tools/flm/__init__.py +1 -0
  4. lemonade/tools/flm/utils.py +255 -0
  5. lemonade/tools/llamacpp/utils.py +62 -13
  6. lemonade/tools/server/flm.py +137 -0
  7. lemonade/tools/server/llamacpp.py +23 -5
  8. lemonade/tools/server/serve.py +292 -135
  9. lemonade/tools/server/static/js/chat.js +165 -82
  10. lemonade/tools/server/static/js/models.js +87 -54
  11. lemonade/tools/server/static/js/shared.js +5 -3
  12. lemonade/tools/server/static/logs.html +47 -0
  13. lemonade/tools/server/static/styles.css +159 -8
  14. lemonade/tools/server/static/webapp.html +28 -10
  15. lemonade/tools/server/tray.py +158 -38
  16. lemonade/tools/server/utils/macos_tray.py +226 -0
  17. lemonade/tools/server/utils/{system_tray.py → windows_tray.py} +13 -0
  18. lemonade/tools/server/webapp.py +4 -1
  19. lemonade/tools/server/wrapped_server.py +91 -25
  20. lemonade/version.py +1 -1
  21. lemonade_install/install.py +25 -2
  22. {lemonade_sdk-8.1.9.dist-info → lemonade_sdk-8.1.11.dist-info}/METADATA +9 -6
  23. {lemonade_sdk-8.1.9.dist-info → lemonade_sdk-8.1.11.dist-info}/RECORD +33 -28
  24. lemonade_server/cli.py +105 -14
  25. lemonade_server/model_manager.py +186 -45
  26. lemonade_server/pydantic_models.py +25 -1
  27. lemonade_server/server_models.json +162 -62
  28. lemonade_server/settings.py +39 -39
  29. {lemonade_sdk-8.1.9.dist-info → lemonade_sdk-8.1.11.dist-info}/WHEEL +0 -0
  30. {lemonade_sdk-8.1.9.dist-info → lemonade_sdk-8.1.11.dist-info}/entry_points.txt +0 -0
  31. {lemonade_sdk-8.1.9.dist-info → lemonade_sdk-8.1.11.dist-info}/licenses/LICENSE +0 -0
  32. {lemonade_sdk-8.1.9.dist-info → lemonade_sdk-8.1.11.dist-info}/licenses/NOTICE.md +0 -0
  33. {lemonade_sdk-8.1.9.dist-info → lemonade_sdk-8.1.11.dist-info}/top_level.txt +0 -0
@@ -2,345 +2,445 @@
2
2
  "Qwen2.5-0.5B-Instruct-CPU": {
3
3
  "checkpoint": "amd/Qwen2.5-0.5B-Instruct-quantized_int4-float16-cpu-onnx",
4
4
  "recipe": "oga-cpu",
5
- "suggested": true
5
+ "suggested": true,
6
+ "size": 0.77
6
7
  },
7
8
  "Llama-3.2-1B-Instruct-CPU": {
8
9
  "checkpoint": "amd/Llama-3.2-1B-Instruct-awq-uint4-float16-cpu-onnx",
9
10
  "recipe": "oga-cpu",
10
- "suggested": false
11
+ "suggested": false,
12
+ "size": 1.64
11
13
  },
12
14
  "Llama-3.2-3B-Instruct-CPU": {
13
15
  "checkpoint": "amd/Llama-3.2-3B-Instruct-awq-uint4-float16-cpu-onnx",
14
16
  "recipe": "oga-cpu",
15
- "suggested": false
17
+ "suggested": false,
18
+ "size": 3.15
16
19
  },
17
20
  "Phi-3-Mini-Instruct-CPU": {
18
21
  "checkpoint": "amd/Phi-3-mini-4k-instruct_int4_float16_onnx_cpu",
19
22
  "recipe": "oga-cpu",
20
- "suggested": true
23
+ "suggested": true,
24
+ "size": 2.23
21
25
  },
22
26
  "Qwen-1.5-7B-Chat-CPU": {
23
27
  "checkpoint": "amd/Qwen1.5-7B-Chat_uint4_asym_g128_float16_onnx_cpu",
24
28
  "recipe": "oga-cpu",
25
- "suggested": true
29
+ "suggested": true,
30
+ "size": 5.89
26
31
  },
27
32
  "DeepSeek-R1-Distill-Llama-8B-CPU": {
28
33
  "checkpoint": "amd/DeepSeek-R1-Distill-Llama-8B-awq-asym-uint4-g128-lmhead-onnx-cpu",
29
34
  "recipe": "oga-cpu",
30
35
  "suggested": true,
31
- "labels": ["reasoning"]
36
+ "labels": ["reasoning"],
37
+ "size": 5.78
32
38
  },
33
39
  "DeepSeek-R1-Distill-Qwen-7B-CPU": {
34
40
  "checkpoint": "amd/DeepSeek-R1-Distill-Llama-8B-awq-asym-uint4-g128-lmhead-onnx-cpu",
35
41
  "recipe": "oga-cpu",
36
42
  "suggested": true,
37
- "labels": ["reasoning"]
43
+ "labels": ["reasoning"],
44
+ "size": 5.78
38
45
  },
39
46
  "Llama-3.2-1B-Instruct-Hybrid": {
40
47
  "checkpoint": "amd/Llama-3.2-1B-Instruct-awq-g128-int4-asym-fp16-onnx-hybrid",
41
48
  "recipe": "oga-hybrid",
42
- "suggested": true
49
+ "suggested": true,
50
+ "size": 1.75
43
51
  },
44
52
  "Llama-3.2-3B-Instruct-Hybrid": {
45
53
  "checkpoint": "amd/Llama-3.2-3B-Instruct-awq-g128-int4-asym-fp16-onnx-hybrid",
46
54
  "recipe": "oga-hybrid",
47
- "suggested": true
55
+ "suggested": true,
56
+ "size": 3.97
48
57
  },
49
58
  "Phi-3-Mini-Instruct-Hybrid": {
50
59
  "checkpoint": "amd/Phi-3-mini-4k-instruct-awq-g128-int4-asym-fp16-onnx-hybrid",
51
60
  "recipe": "oga-hybrid",
52
- "suggested": true
61
+ "suggested": true,
62
+ "size": 3.89
53
63
  },
54
64
  "Phi-3.5-Mini-Instruct-Hybrid": {
55
65
  "checkpoint": "amd/Phi-3.5-mini-instruct-awq-g128-int4-asym-fp16-onnx-hybrid",
56
66
  "recipe": "oga-hybrid",
57
- "suggested": false
67
+ "suggested": false,
68
+ "size": 3.92
58
69
  },
59
70
  "Qwen-1.5-7B-Chat-Hybrid": {
60
71
  "checkpoint": "amd/Qwen1.5-7B-Chat-awq-g128-int4-asym-fp16-onnx-hybrid",
61
72
  "recipe": "oga-hybrid",
62
- "suggested": true
73
+ "suggested": true,
74
+ "size": 8.22
63
75
  },
64
76
  "Qwen-2.5-7B-Instruct-Hybrid": {
65
77
  "checkpoint": "amd/Qwen2.5-7B-Instruct-awq-uint4-asym-g128-lmhead-g32-fp16-onnx-hybrid",
66
78
  "recipe": "oga-hybrid",
67
- "suggested": true
79
+ "suggested": true,
80
+ "size": 8.42
68
81
  },
69
82
  "Qwen-2.5-3B-Instruct-Hybrid": {
70
83
  "checkpoint": "amd/Qwen2.5-3B-Instruct-awq-uint4-asym-g128-lmhead-g32-fp16-onnx-hybrid",
71
84
  "recipe": "oga-hybrid",
72
- "suggested": true
85
+ "suggested": true,
86
+ "size": 3.84
73
87
  },
74
88
  "Qwen-2.5-1.5B-Instruct-Hybrid": {
75
89
  "checkpoint": "amd/Qwen2.5-1.5B-Instruct-awq-uint4-asym-g128-lmhead-g32-fp16-onnx-hybrid",
76
90
  "recipe": "oga-hybrid",
77
- "suggested": true
91
+ "suggested": true,
92
+ "size": 2.08
78
93
  },
79
94
  "DeepSeek-R1-Distill-Llama-8B-Hybrid": {
80
95
  "checkpoint": "amd/DeepSeek-R1-Distill-Llama-8B-awq-asym-uint4-g128-lmhead-onnx-hybrid",
81
96
  "recipe": "oga-hybrid",
82
97
  "suggested": true,
83
- "labels": ["reasoning"]
98
+ "labels": ["reasoning"],
99
+ "size": 8.45
84
100
  },
85
101
  "DeepSeek-R1-Distill-Qwen-7B-Hybrid": {
86
102
  "checkpoint": "amd/DeepSeek-R1-Distill-Qwen-7B-awq-asym-uint4-g128-lmhead-onnx-hybrid",
87
103
  "recipe": "oga-hybrid",
88
104
  "max_prompt_length": 2000,
89
105
  "suggested": false,
90
- "labels": ["reasoning"]
106
+ "labels": ["reasoning"],
107
+ "size": 8.84
91
108
  },
92
109
  "Mistral-7B-v0.3-Instruct-Hybrid": {
93
110
  "checkpoint": "amd/Mistral-7B-Instruct-v0.3-awq-g128-int4-asym-fp16-onnx-hybrid",
94
111
  "recipe": "oga-hybrid",
95
- "suggested": true
112
+ "suggested": true,
113
+ "size": 7.31
96
114
  },
97
115
  "Llama-3.1-8B-Instruct-Hybrid": {
98
116
  "checkpoint": "amd/Llama-3.1-8B-Instruct-awq-asym-uint4-g128-lmhead-onnx-hybrid",
99
117
  "recipe": "oga-hybrid",
100
- "suggested": true
118
+ "suggested": true,
119
+ "size": 8.47
101
120
  },
102
121
  "Llama-xLAM-2-8b-fc-r-Hybrid": {
103
122
  "checkpoint": "amd/Llama-xLAM-2-8b-fc-r-awq-g128-int4-asym-bfp16-onnx-hybrid",
104
123
  "recipe": "oga-hybrid",
105
- "suggested": true
124
+ "suggested": true,
125
+ "size": 8.47
106
126
  },
107
127
  "Qwen-2.5-7B-Instruct-NPU": {
108
128
  "checkpoint": "amd/Qwen2.5-7B-Instruct-awq-g128-int4-asym-bf16-onnx-ryzen-strix",
109
129
  "recipe": "oga-npu",
110
- "suggested": true
130
+ "suggested": true,
131
+ "size": 10.14
111
132
  },
112
133
  "Qwen-2.5-1.5B-Instruct-NPU": {
113
134
  "checkpoint": "amd/Qwen2.5-1.5B-Instruct-awq-g128-int4-asym-bf16-onnx-ryzen-strix",
114
135
  "recipe": "oga-npu",
115
- "suggested": true
136
+ "suggested": true,
137
+ "size": 2.89
116
138
  },
117
139
  "DeepSeek-R1-Distill-Llama-8B-NPU": {
118
140
  "checkpoint": "amd/DeepSeek-R1-Distill-Llama-8B-awq-g128-int4-asym-bf16-onnx-ryzen-strix",
119
141
  "recipe": "oga-npu",
120
- "suggested": true
142
+ "suggested": true,
143
+ "size": 10.63
121
144
  },
122
145
  "DeepSeek-R1-Distill-Qwen-7B-NPU": {
123
146
  "checkpoint": "amd/DeepSeek-R1-Distill-Qwen-7B-awq-g128-int4-asym-bf16-onnx-ryzen-strix",
124
147
  "recipe": "oga-npu",
125
- "suggested": false
148
+ "suggested": false,
149
+ "size": 10.3
126
150
  },
127
151
  "DeepSeek-R1-Distill-Qwen-1.5B-NPU": {
128
152
  "checkpoint": "amd/DeepSeek-R1-Distill-Qwen-1.5B-awq-g128-int4-asym-bf16-onnx-ryzen-strix",
129
153
  "recipe": "oga-npu",
130
- "suggested": false
154
+ "suggested": false,
155
+ "size": 3.02
131
156
  },
132
157
  "Llama-3.2-3B-Instruct-NPU": {
133
158
  "checkpoint": "amd/Llama-3.2-3B-Instruct-awq-g128-int4-asym-bf16-onnx-ryzen-strix",
134
159
  "recipe": "oga-npu",
135
- "suggested": false
160
+ "suggested": false,
161
+ "size": 2.46
136
162
  },
137
163
  "Llama-3.2-1B-Instruct-NPU": {
138
164
  "checkpoint": "amd/Llama-3.2-1B-Instruct-awq-g128-int4-asym-bf16-onnx-ryzen-strix",
139
165
  "recipe": "oga-npu",
140
- "suggested": false
166
+ "suggested": false,
167
+ "size": 1.18
141
168
  },
142
169
  "Mistral-7B-v0.3-Instruct-NPU": {
143
170
  "checkpoint": "amd/Mistral-7B-Instruct-v0.3-awq-g128-int4-asym-bf16-onnx-ryzen-strix",
144
171
  "recipe": "oga-npu",
145
- "suggested": true
172
+ "suggested": true,
173
+ "size": 11.75
146
174
  },
147
175
  "Phi-3.5-Mini-Instruct-NPU": {
148
176
  "checkpoint": "amd/Phi-3.5-mini-instruct-awq-g128-int4-asym-bf16-onnx-ryzen-strix",
149
177
  "recipe": "oga-npu",
150
- "suggested": true
178
+ "suggested": true,
179
+ "size": 4.18
151
180
  },
152
181
  "ChatGLM-3-6b-Instruct-NPU": {
153
182
  "checkpoint": "amd/chatglm3-6b-awq-g128-int4-asym-bf16-onnx-ryzen-strix",
154
183
  "recipe": "oga-npu",
155
- "suggested": false
184
+ "suggested": false,
185
+ "size": 3.53
156
186
  },
157
187
  "AMD-OLMo-1B-Instruct-NPU": {
158
188
  "checkpoint": "amd/AMD-OLMo-1B-SFT-DPO-awq-g128-int4-asym-bf16-onnx-ryzen-strix",
159
189
  "recipe": "oga-npu",
160
- "suggested": false
190
+ "suggested": false,
191
+ "size": 2.56
161
192
  },
162
193
  "Llama-3.2-1B-Instruct-DirectML": {
163
194
  "checkpoint": "amd/Llama-3.2-1B-Instruct-dml-int4-awq-block-128-directml",
164
195
  "recipe": "oga-igpu",
165
- "suggested": false
196
+ "suggested": false,
197
+ "size": 2.81
166
198
  },
167
199
  "Llama-3.2-3B-Instruct-DirectML": {
168
200
  "checkpoint": "amd/Llama-3.2-3B-Instruct-dml-int4-awq-block-128-directml",
169
201
  "recipe": "oga-igpu",
170
- "suggested": false
202
+ "suggested": false,
203
+ "size": 6.75
171
204
  },
172
205
  "Phi-3.5-Mini-Instruct-DirectML": {
173
206
  "checkpoint": "amd/phi3.5-mini-instruct-int4-awq-block-128-directml",
174
207
  "recipe": "oga-igpu",
175
- "suggested": false
208
+ "suggested": false,
209
+ "size": 2.14
176
210
  },
177
211
  "Qwen-1.5-7B-Chat-DirectML": {
178
212
  "checkpoint": "amd/Qwen1.5-7B-Chat-dml-int4-awq-block-128-directml",
179
213
  "recipe": "oga-igpu",
180
- "suggested": false
214
+ "suggested": false,
215
+ "size": 3.73
181
216
  },
182
217
  "Mistral-7B-v0.1-Instruct-DirectML": {
183
218
  "checkpoint": "amd/Mistral-7B-Instruct-v0.1-awq-g128-int4-onnx-directml",
184
219
  "recipe": "oga-igpu",
185
- "suggested": false
220
+ "suggested": false,
221
+ "size": 3.67
186
222
  },
187
223
  "Llama-3-8B-Instruct-DirectML": {
188
224
  "checkpoint": "amd/llama3-8b-instruct-awq-g128-int4-onnx-directml",
189
225
  "recipe": "oga-igpu",
190
- "suggested": false
226
+ "suggested": false,
227
+ "size": 4.61
191
228
  },
192
229
  "Qwen3-0.6B-GGUF": {
193
230
  "checkpoint": "unsloth/Qwen3-0.6B-GGUF:Q4_0",
194
231
  "recipe": "llamacpp",
195
232
  "suggested": true,
196
- "labels": ["reasoning"]
233
+ "labels": ["reasoning"],
234
+ "size": 0.38
197
235
  },
198
236
  "Qwen3-1.7B-GGUF": {
199
237
  "checkpoint": "unsloth/Qwen3-1.7B-GGUF:Q4_0",
200
238
  "recipe": "llamacpp",
201
239
  "suggested": true,
202
- "labels": ["reasoning"]
240
+ "labels": ["reasoning"],
241
+ "size": 1.06
203
242
  },
204
243
  "Qwen3-4B-GGUF": {
205
244
  "checkpoint": "unsloth/Qwen3-4B-GGUF:Q4_0",
206
245
  "recipe": "llamacpp",
207
246
  "suggested": true,
208
- "labels": ["reasoning"]
247
+ "labels": ["reasoning"],
248
+ "size": 2.38
209
249
  },
210
250
  "Qwen3-8B-GGUF": {
211
251
  "checkpoint": "unsloth/Qwen3-8B-GGUF:Q4_1",
212
252
  "recipe": "llamacpp",
213
253
  "suggested": true,
214
- "labels": ["reasoning"]
254
+ "labels": ["reasoning"],
255
+ "size": 5.25
215
256
  },
216
257
  "DeepSeek-Qwen3-8B-GGUF": {
217
258
  "checkpoint": "unsloth/DeepSeek-R1-0528-Qwen3-8B-GGUF:Q4_1",
218
259
  "recipe": "llamacpp",
219
260
  "suggested": true,
220
- "labels": ["reasoning"]
261
+ "labels": ["reasoning"],
262
+ "size": 5.25
221
263
  },
222
264
  "Qwen3-14B-GGUF": {
223
265
  "checkpoint": "unsloth/Qwen3-14B-GGUF:Q4_0",
224
266
  "recipe": "llamacpp",
225
267
  "suggested": true,
226
- "labels": ["reasoning"]
268
+ "labels": ["reasoning"],
269
+ "size": 8.54
227
270
  },
228
271
  "Qwen3-4B-Instruct-2507-GGUF": {
229
272
  "checkpoint": "unsloth/Qwen3-4B-Instruct-2507-GGUF:Qwen3-4B-Instruct-2507-Q4_K_M.gguf",
230
273
  "recipe": "llamacpp",
231
274
  "suggested": true,
232
- "labels": ["hot"]
275
+ "labels": ["hot"],
276
+ "size": 2.5
233
277
  },
234
278
  "Qwen3-30B-A3B-GGUF": {
235
279
  "checkpoint": "unsloth/Qwen3-30B-A3B-GGUF:Q4_0",
236
280
  "recipe": "llamacpp",
237
281
  "suggested": true,
238
- "labels": ["reasoning"]
282
+ "labels": ["reasoning"],
283
+ "size": 17.4
239
284
  },
240
285
  "Qwen3-30B-A3B-Instruct-2507-GGUF": {
241
286
  "checkpoint": "unsloth/Qwen3-30B-A3B-Instruct-2507-GGUF:Qwen3-30B-A3B-Instruct-2507-Q4_0.gguf",
242
287
  "recipe": "llamacpp",
243
288
  "suggested": true,
244
- "labels": ["hot"]
289
+ "size": 17.4
245
290
  },
246
291
  "Qwen3-Coder-30B-A3B-Instruct-GGUF": {
247
292
  "checkpoint": "unsloth/Qwen3-Coder-30B-A3B-Instruct-GGUF:Qwen3-Coder-30B-A3B-Instruct-Q4_K_M.gguf",
248
293
  "recipe": "llamacpp",
249
294
  "suggested": true,
250
- "labels": ["coding","tool-calling"]
295
+ "labels": ["coding","tool-calling","hot"],
296
+ "size": 18.6
251
297
  },
252
298
  "Gemma-3-4b-it-GGUF": {
253
299
  "checkpoint": "ggml-org/gemma-3-4b-it-GGUF:Q4_K_M",
254
300
  "mmproj": "mmproj-model-f16.gguf",
255
301
  "recipe": "llamacpp",
256
302
  "suggested": true,
257
- "labels": ["hot","vision"]
303
+ "labels": ["hot","vision"],
304
+ "size": 3.61
258
305
  },
259
306
  "Qwen2.5-VL-7B-Instruct-GGUF": {
260
307
  "checkpoint": "ggml-org/Qwen2.5-VL-7B-Instruct-GGUF:Q4_K_M",
261
308
  "mmproj": "mmproj-Qwen2.5-VL-7B-Instruct-f16.gguf",
262
309
  "recipe": "llamacpp",
263
310
  "suggested": true,
264
- "labels": ["vision"]
311
+ "labels": ["vision"],
312
+ "size": 4.68
265
313
  },
266
314
  "Llama-4-Scout-17B-16E-Instruct-GGUF": {
267
315
  "checkpoint": "unsloth/Llama-4-Scout-17B-16E-Instruct-GGUF:Q4_K_S",
268
316
  "mmproj": "mmproj-F16.gguf",
269
317
  "recipe": "llamacpp",
270
318
  "suggested": true,
271
- "labels": ["vision"]
319
+ "labels": ["vision"],
320
+ "size": 61.5
272
321
  },
273
322
  "Cogito-v2-llama-109B-MoE-GGUF": {
274
323
  "checkpoint": "unsloth/cogito-v2-preview-llama-109B-MoE-GGUF:Q4_K_M",
275
324
  "mmproj": "mmproj-F16.gguf",
276
325
  "recipe": "llamacpp",
277
326
  "suggested": false,
278
- "labels": ["vision"]
327
+ "labels": ["vision"],
328
+ "size": 65.3
279
329
  },
280
330
  "nomic-embed-text-v1-GGUF": {
281
331
  "checkpoint": "nomic-ai/nomic-embed-text-v1-GGUF:Q4_K_S",
282
332
  "recipe": "llamacpp",
283
333
  "suggested": true,
284
- "labels": ["embeddings"]
334
+ "labels": ["embeddings"],
335
+ "size": 0.0781
285
336
  },
286
337
  "nomic-embed-text-v2-moe-GGUF": {
287
338
  "checkpoint": "nomic-ai/nomic-embed-text-v2-moe-GGUF:Q8_0",
288
339
  "recipe": "llamacpp",
289
340
  "suggested": true,
290
- "labels": ["embeddings"]
341
+ "labels": ["embeddings"],
342
+ "size": 0.51
291
343
  },
292
344
  "bge-reranker-v2-m3-GGUF": {
293
345
  "checkpoint": "pqnet/bge-reranker-v2-m3-Q8_0-GGUF",
294
346
  "recipe": "llamacpp",
295
347
  "suggested": true,
296
- "labels": ["reranking"]
348
+ "labels": ["reranking"],
349
+ "size": 0.53
297
350
  },
298
351
  "jina-reranker-v1-tiny-en-GGUF": {
299
352
  "checkpoint": "mradermacher/jina-reranker-v1-tiny-en-GGUF:Q8_0",
300
353
  "recipe": "llamacpp",
301
354
  "suggested": false,
302
- "labels": ["reranking"]
355
+ "labels": ["reranking"],
356
+ "size": 0.03
303
357
  },
304
358
  "Devstral-Small-2507-GGUF":{
305
359
  "checkpoint": "mistralai/Devstral-Small-2507_gguf:Q4_K_M",
306
360
  "recipe": "llamacpp",
307
361
  "suggested": true,
308
- "labels": ["coding","tool-calling"]
362
+ "labels": ["coding","tool-calling"],
363
+ "size": 14.3
309
364
  },
310
365
  "Qwen2.5-Coder-32B-Instruct-GGUF": {
311
366
  "checkpoint": "Qwen/Qwen2.5-Coder-32B-Instruct-GGUF:Q4_K_M",
312
367
  "recipe": "llamacpp",
313
368
  "suggested": true,
314
- "labels": ["coding"]
369
+ "labels": ["coding"],
370
+ "size": 19.85
315
371
  },
316
372
  "gpt-oss-120b-GGUF": {
317
373
  "checkpoint": "unsloth/gpt-oss-120b-GGUF:Q4_K_M",
318
374
  "recipe": "llamacpp",
319
375
  "suggested": false,
320
- "labels": ["reasoning", "tool-calling"]
376
+ "labels": ["reasoning", "tool-calling"],
377
+ "size": 62.7
321
378
  },
322
379
  "gpt-oss-20b-GGUF": {
323
380
  "checkpoint": "unsloth/gpt-oss-20b-GGUF:Q4_K_M",
324
381
  "recipe": "llamacpp",
325
382
  "suggested": false,
326
- "labels": ["reasoning", "tool-calling"]
383
+ "labels": ["reasoning", "tool-calling"],
384
+ "size": 11.6
327
385
  },
328
386
  "gpt-oss-120b-mxfp-GGUF": {
329
387
  "checkpoint": "ggml-org/gpt-oss-120b-GGUF:*",
330
388
  "recipe": "llamacpp",
331
389
  "suggested": true,
332
- "labels": ["hot", "reasoning", "tool-calling"]
390
+ "labels": ["hot", "reasoning", "tool-calling"],
391
+ "size": 63.3
333
392
  },
334
393
  "gpt-oss-20b-mxfp4-GGUF": {
335
394
  "checkpoint": "ggml-org/gpt-oss-20b-GGUF",
336
395
  "recipe": "llamacpp",
337
396
  "suggested": true,
338
- "labels": ["hot", "reasoning", "tool-calling"]
397
+ "labels": ["hot", "reasoning", "tool-calling"],
398
+ "size": 19.48
339
399
  },
340
400
  "GLM-4.5-Air-UD-Q4K-XL-GGUF": {
341
401
  "checkpoint": "unsloth/GLM-4.5-Air-GGUF:UD-Q4_K_XL",
342
402
  "recipe": "llamacpp",
343
403
  "suggested": true,
344
- "labels": ["reasoning","hot"]
404
+ "labels": ["reasoning"],
405
+ "size": 73.1
406
+ },
407
+ "Gemma-3-4b-it-FLM": {
408
+ "checkpoint": "gemma3:4b",
409
+ "recipe": "flm",
410
+ "suggested": true,
411
+ "labels": ["hot","vision"],
412
+ "size": 5.26
413
+ },
414
+ "Qwen3-4B-Instruct-2507-FLM": {
415
+ "checkpoint": "qwen3-it:4b",
416
+ "recipe": "flm",
417
+ "suggested": true,
418
+ "labels": ["hot"],
419
+ "size": 3.07
420
+ },
421
+ "Qwen3-8b-FLM": {
422
+ "checkpoint": "qwen3:8b",
423
+ "recipe": "flm",
424
+ "suggested": true,
425
+ "labels": ["reasoning"],
426
+ "size": 5.57
427
+ },
428
+ "Llama-3.2-1B-FLM": {
429
+ "checkpoint": "llama3.2:1b",
430
+ "recipe": "flm",
431
+ "suggested": true,
432
+ "size": 1.21
433
+ },
434
+ "Llama-3.2-3B-FLM": {
435
+ "checkpoint": "llama3.2:3b",
436
+ "recipe": "flm",
437
+ "suggested": true,
438
+ "size": 2.62
439
+ },
440
+ "Llama-3.1-8B-FLM": {
441
+ "checkpoint": "llama3.1:8b",
442
+ "recipe": "flm",
443
+ "suggested": true,
444
+ "size": 5.36
345
445
  }
346
446
  }
@@ -1,39 +1,39 @@
1
- import json
2
- import os
3
- from lemonade.cache import DEFAULT_CACHE_DIR
4
-
5
- # Define the path for the user settings file, placing it in the cache directory
6
- USER_SETTINGS_FILE = os.path.join(DEFAULT_CACHE_DIR, "user_settings.json")
7
-
8
-
9
- def save_setting(key, value):
10
- """Save a setting to the user_settings.json file."""
11
- # Ensure the cache directory exists
12
- os.makedirs(DEFAULT_CACHE_DIR, exist_ok=True)
13
-
14
- settings = {}
15
- if os.path.exists(USER_SETTINGS_FILE):
16
- with open(USER_SETTINGS_FILE, "r") as f:
17
- try:
18
- settings = json.load(f)
19
- except json.JSONDecodeError:
20
- # If the file is empty or corrupt, start with a fresh dictionary
21
- pass
22
-
23
- settings[key] = value
24
- with open(USER_SETTINGS_FILE, "w") as f:
25
- json.dump(settings, f, indent=4)
26
-
27
-
28
- def load_setting(key, default=None):
29
- """Load a setting from the user_settings.json file."""
30
- if not os.path.exists(USER_SETTINGS_FILE):
31
- return default
32
-
33
- with open(USER_SETTINGS_FILE, "r") as f:
34
- try:
35
- settings = json.load(f)
36
- return settings.get(key, default)
37
- except json.JSONDecodeError:
38
- # Return default if the file is empty or corrupt
39
- return default
1
+ import json
2
+ import os
3
+ from lemonade.cache import DEFAULT_CACHE_DIR
4
+
5
+ # Define the path for the user settings file, placing it in the cache directory
6
+ USER_SETTINGS_FILE = os.path.join(DEFAULT_CACHE_DIR, "user_settings.json")
7
+
8
+
9
+ def save_setting(key, value):
10
+ """Save a setting to the user_settings.json file."""
11
+ # Ensure the cache directory exists
12
+ os.makedirs(DEFAULT_CACHE_DIR, exist_ok=True)
13
+
14
+ settings = {}
15
+ if os.path.exists(USER_SETTINGS_FILE):
16
+ with open(USER_SETTINGS_FILE, "r") as f:
17
+ try:
18
+ settings = json.load(f)
19
+ except json.JSONDecodeError:
20
+ # If the file is empty or corrupt, start with a fresh dictionary
21
+ pass
22
+
23
+ settings[key] = value
24
+ with open(USER_SETTINGS_FILE, "w") as f:
25
+ json.dump(settings, f, indent=4)
26
+
27
+
28
+ def load_setting(key, default=None):
29
+ """Load a setting from the user_settings.json file."""
30
+ if not os.path.exists(USER_SETTINGS_FILE):
31
+ return default
32
+
33
+ with open(USER_SETTINGS_FILE, "r") as f:
34
+ try:
35
+ settings = json.load(f)
36
+ return settings.get(key, default)
37
+ except json.JSONDecodeError:
38
+ # Return default if the file is empty or corrupt
39
+ return default