lemonade-sdk 9.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. lemonade/__init__.py +5 -0
  2. lemonade/api.py +180 -0
  3. lemonade/cache.py +92 -0
  4. lemonade/cli.py +173 -0
  5. lemonade/common/__init__.py +0 -0
  6. lemonade/common/build.py +176 -0
  7. lemonade/common/cli_helpers.py +139 -0
  8. lemonade/common/exceptions.py +98 -0
  9. lemonade/common/filesystem.py +368 -0
  10. lemonade/common/inference_engines.py +408 -0
  11. lemonade/common/network.py +93 -0
  12. lemonade/common/printing.py +110 -0
  13. lemonade/common/status.py +471 -0
  14. lemonade/common/system_info.py +1411 -0
  15. lemonade/common/test_helpers.py +28 -0
  16. lemonade/profilers/__init__.py +1 -0
  17. lemonade/profilers/agt_power.py +437 -0
  18. lemonade/profilers/hwinfo_power.py +429 -0
  19. lemonade/profilers/memory_tracker.py +259 -0
  20. lemonade/profilers/profiler.py +58 -0
  21. lemonade/sequence.py +363 -0
  22. lemonade/state.py +159 -0
  23. lemonade/tools/__init__.py +1 -0
  24. lemonade/tools/accuracy.py +432 -0
  25. lemonade/tools/adapter.py +114 -0
  26. lemonade/tools/bench.py +302 -0
  27. lemonade/tools/flm/__init__.py +1 -0
  28. lemonade/tools/flm/utils.py +305 -0
  29. lemonade/tools/huggingface/bench.py +187 -0
  30. lemonade/tools/huggingface/load.py +235 -0
  31. lemonade/tools/huggingface/utils.py +359 -0
  32. lemonade/tools/humaneval.py +264 -0
  33. lemonade/tools/llamacpp/bench.py +255 -0
  34. lemonade/tools/llamacpp/load.py +222 -0
  35. lemonade/tools/llamacpp/utils.py +1260 -0
  36. lemonade/tools/management_tools.py +319 -0
  37. lemonade/tools/mmlu.py +319 -0
  38. lemonade/tools/oga/__init__.py +0 -0
  39. lemonade/tools/oga/bench.py +120 -0
  40. lemonade/tools/oga/load.py +804 -0
  41. lemonade/tools/oga/migration.py +403 -0
  42. lemonade/tools/oga/utils.py +462 -0
  43. lemonade/tools/perplexity.py +147 -0
  44. lemonade/tools/prompt.py +263 -0
  45. lemonade/tools/report/__init__.py +0 -0
  46. lemonade/tools/report/llm_report.py +203 -0
  47. lemonade/tools/report/table.py +899 -0
  48. lemonade/tools/server/__init__.py +0 -0
  49. lemonade/tools/server/flm.py +133 -0
  50. lemonade/tools/server/llamacpp.py +320 -0
  51. lemonade/tools/server/serve.py +2123 -0
  52. lemonade/tools/server/static/favicon.ico +0 -0
  53. lemonade/tools/server/static/index.html +279 -0
  54. lemonade/tools/server/static/js/chat.js +1059 -0
  55. lemonade/tools/server/static/js/model-settings.js +183 -0
  56. lemonade/tools/server/static/js/models.js +1395 -0
  57. lemonade/tools/server/static/js/shared.js +556 -0
  58. lemonade/tools/server/static/logs.html +191 -0
  59. lemonade/tools/server/static/styles.css +2654 -0
  60. lemonade/tools/server/static/webapp.html +321 -0
  61. lemonade/tools/server/tool_calls.py +153 -0
  62. lemonade/tools/server/tray.py +664 -0
  63. lemonade/tools/server/utils/macos_tray.py +226 -0
  64. lemonade/tools/server/utils/port.py +77 -0
  65. lemonade/tools/server/utils/thread.py +85 -0
  66. lemonade/tools/server/utils/windows_tray.py +408 -0
  67. lemonade/tools/server/webapp.py +34 -0
  68. lemonade/tools/server/wrapped_server.py +559 -0
  69. lemonade/tools/tool.py +374 -0
  70. lemonade/version.py +1 -0
  71. lemonade_install/__init__.py +1 -0
  72. lemonade_install/install.py +239 -0
  73. lemonade_sdk-9.1.1.dist-info/METADATA +276 -0
  74. lemonade_sdk-9.1.1.dist-info/RECORD +84 -0
  75. lemonade_sdk-9.1.1.dist-info/WHEEL +5 -0
  76. lemonade_sdk-9.1.1.dist-info/entry_points.txt +5 -0
  77. lemonade_sdk-9.1.1.dist-info/licenses/LICENSE +201 -0
  78. lemonade_sdk-9.1.1.dist-info/licenses/NOTICE.md +47 -0
  79. lemonade_sdk-9.1.1.dist-info/top_level.txt +3 -0
  80. lemonade_server/cli.py +805 -0
  81. lemonade_server/model_manager.py +758 -0
  82. lemonade_server/pydantic_models.py +159 -0
  83. lemonade_server/server_models.json +643 -0
  84. lemonade_server/settings.py +39 -0
@@ -0,0 +1,643 @@
1
+ {
2
+ "Qwen2.5-0.5B-Instruct-CPU": {
3
+ "checkpoint": "amd/Qwen2.5-0.5B-Instruct-quantized_int4-float16-cpu-onnx",
4
+ "recipe": "oga-cpu",
5
+ "suggested": true,
6
+ "size": 0.77
7
+ },
8
+ "Llama-3.2-1B-Instruct-CPU": {
9
+ "checkpoint": "amd/Llama-3.2-1B-Instruct-awq-uint4-float16-cpu-onnx",
10
+ "recipe": "oga-cpu",
11
+ "suggested": false,
12
+ "size": 1.64
13
+ },
14
+ "Llama-3.2-3B-Instruct-CPU": {
15
+ "checkpoint": "amd/Llama-3.2-3B-Instruct-awq-uint4-float16-cpu-onnx",
16
+ "recipe": "oga-cpu",
17
+ "suggested": false,
18
+ "size": 3.15
19
+ },
20
+ "Phi-3-Mini-Instruct-CPU": {
21
+ "checkpoint": "amd/Phi-3-mini-4k-instruct_int4_float16_onnx_cpu",
22
+ "recipe": "oga-cpu",
23
+ "suggested": true,
24
+ "size": 2.23
25
+ },
26
+ "Qwen-1.5-7B-Chat-CPU": {
27
+ "checkpoint": "amd/Qwen1.5-7B-Chat_uint4_asym_g128_float16_onnx_cpu",
28
+ "recipe": "oga-cpu",
29
+ "suggested": true,
30
+ "size": 5.89
31
+ },
32
+ "DeepSeek-R1-Distill-Llama-8B-CPU": {
33
+ "checkpoint": "amd/DeepSeek-R1-Distill-Llama-8B-awq-asym-uint4-g128-lmhead-onnx-cpu",
34
+ "recipe": "oga-cpu",
35
+ "suggested": true,
36
+ "labels": ["reasoning"],
37
+ "size": 5.78
38
+ },
39
+ "DeepSeek-R1-Distill-Qwen-7B-CPU": {
40
+ "checkpoint": "amd/DeepSeek-R1-Distill-Llama-8B-awq-asym-uint4-g128-lmhead-onnx-cpu",
41
+ "recipe": "oga-cpu",
42
+ "suggested": true,
43
+ "labels": ["reasoning"],
44
+ "size": 5.78
45
+ },
46
+ "Llama-3.2-1B-Instruct-Hybrid": {
47
+ "checkpoint": "amd/Llama-3.2-1B-Instruct-onnx-ryzenai-hybrid",
48
+ "recipe": "oga-hybrid",
49
+ "suggested": true,
50
+ "size": 1.89
51
+ },
52
+ "Llama-3.2-3B-Instruct-Hybrid": {
53
+ "checkpoint": "amd/Llama-3.2-3B-Instruct-onnx-ryzenai-hybrid",
54
+ "recipe": "oga-hybrid",
55
+ "suggested": true,
56
+ "size": 4.28
57
+ },
58
+ "Phi-3-Mini-Instruct-Hybrid": {
59
+ "checkpoint": "amd/Phi-3-mini-4k-instruct-onnx-ryzenai-hybrid",
60
+ "recipe": "oga-hybrid",
61
+ "suggested": true,
62
+ "size": 4.18
63
+ },
64
+ "Phi-3.5-Mini-Instruct-Hybrid": {
65
+ "checkpoint": "amd/Phi-3.5-mini-instruct-onnx-ryzenai-hybrid",
66
+ "recipe": "oga-hybrid",
67
+ "suggested": false,
68
+ "size": 4.21
69
+ },
70
+ "Qwen-1.5-7B-Chat-Hybrid": {
71
+ "checkpoint": "amd/Qwen1.5-7B-Chat-onnx-ryzenai-hybrid",
72
+ "recipe": "oga-hybrid",
73
+ "suggested": true,
74
+ "size": 8.83
75
+ },
76
+ "Qwen-2.5-7B-Instruct-Hybrid": {
77
+ "checkpoint": "amd/Qwen2.5-7B-Instruct-onnx-ryzenai-hybrid",
78
+ "recipe": "oga-hybrid",
79
+ "suggested": true,
80
+ "size": 8.65
81
+ },
82
+ "Qwen-2.5-3B-Instruct-Hybrid": {
83
+ "checkpoint": "amd/Qwen2.5-3B-Instruct-onnx-ryzenai-hybrid",
84
+ "recipe": "oga-hybrid",
85
+ "suggested": true,
86
+ "size": 3.97
87
+ },
88
+ "Qwen-2.5-1.5B-Instruct-Hybrid": {
89
+ "checkpoint": "amd/Qwen2.5-1.5B-Instruct-onnx-ryzenai-hybrid",
90
+ "recipe": "oga-hybrid",
91
+ "suggested": true,
92
+ "size": 2.16
93
+ },
94
+ "DeepSeek-R1-Distill-Llama-8B-Hybrid": {
95
+ "checkpoint": "amd/DeepSeek-R1-Distill-Llama-8B-onnx-ryzenai-hybrid",
96
+ "recipe": "oga-hybrid",
97
+ "suggested": true,
98
+ "labels": ["reasoning"],
99
+ "size": 9.09
100
+ },
101
+ "DeepSeek-R1-Distill-Qwen-7B-Hybrid": {
102
+ "checkpoint": "amd/DeepSeek-R1-Distill-Qwen-7B-onnx-ryzenai-hybrid",
103
+ "recipe": "oga-hybrid",
104
+ "max_prompt_length": 2000,
105
+ "suggested": false,
106
+ "labels": ["reasoning"],
107
+ "size": 8.67
108
+ },
109
+ "Mistral-7B-v0.3-Instruct-Hybrid": {
110
+ "checkpoint": "amd/Mistral-7B-Instruct-v0.3-onnx-ryzenai-hybrid",
111
+ "recipe": "oga-hybrid",
112
+ "suggested": true,
113
+ "size": 7.85
114
+ },
115
+ "Llama-3.1-8B-Instruct-Hybrid": {
116
+ "checkpoint": "amd/Meta-Llama-3.1-8B-Instruct-onnx-ryzenai-hybrid",
117
+ "recipe": "oga-hybrid",
118
+ "suggested": true,
119
+ "size": 9.09
120
+ },
121
+ "Qwen3-1.7B-Hybrid": {
122
+ "checkpoint": "amd/Qwen3-1.7B-awq-quant-onnx-hybrid",
123
+ "recipe": "oga-hybrid",
124
+ "suggested": true,
125
+ "labels": ["reasoning"],
126
+ "size": 2.55
127
+ },
128
+ "Phi-4-Mini-Instruct-Hybrid": {
129
+ "checkpoint": "amd/Phi-4-mini-instruct-onnx-ryzenai-hybrid",
130
+ "recipe": "oga-hybrid",
131
+ "suggested": true,
132
+ "size": 5.46
133
+ },
134
+ "Qwen3-4B-Hybrid": {
135
+ "checkpoint": "amd/Qwen3-4B-awq-quant-onnx-hybrid",
136
+ "recipe": "oga-hybrid",
137
+ "suggested": true,
138
+ "labels": ["reasoning"],
139
+ "size": 5.17
140
+ },
141
+ "Qwen3-8B-Hybrid": {
142
+ "checkpoint": "amd/Qwen3-8B-awq-quant-onnx-hybrid",
143
+ "recipe": "oga-hybrid",
144
+ "suggested": true,
145
+ "labels": ["reasoning"],
146
+ "size": 9.42
147
+ },
148
+ "Qwen-2.5-7B-Instruct-NPU": {
149
+ "checkpoint": "amd/Qwen2.5-7B-Instruct-onnx-ryzenai-npu",
150
+ "recipe": "oga-npu",
151
+ "suggested": true,
152
+ "size": 8.82
153
+ },
154
+ "Qwen-2.5-3B-Instruct-NPU": {
155
+ "checkpoint": "amd/Qwen2.5-3B-Instruct-onnx-ryzenai-npu",
156
+ "recipe": "oga-npu",
157
+ "suggested": true,
158
+ "size": 4.09
159
+ },
160
+ "DeepSeek-R1-Distill-Llama-8B-NPU": {
161
+ "checkpoint": "amd/DeepSeek-R1-Distill-Llama-8B-onnx-ryzenai-npu",
162
+ "recipe": "oga-npu",
163
+ "suggested": true,
164
+ "size": 9.30
165
+ },
166
+ "DeepSeek-R1-Distill-Qwen-7B-NPU": {
167
+ "checkpoint": "amd/DeepSeek-R1-Distill-Qwen-7B-onnx-ryzenai-npu",
168
+ "recipe": "oga-npu",
169
+ "suggested": false,
170
+ "size": 8.87
171
+ },
172
+ "DeepSeek-R1-Distill-Qwen-1.5B-NPU": {
173
+ "checkpoint": "amd/DeepSeek-R1-Distill-Qwen-1.5B-onnx-ryzenai-npu",
174
+ "recipe": "oga-npu",
175
+ "suggested": false,
176
+ "size": 2.30
177
+ },
178
+ "Llama-3.2-1B-Instruct-NPU": {
179
+ "checkpoint": "amd/Llama-3.2-1B-Instruct-onnx-ryzenai-npu",
180
+ "recipe": "oga-npu",
181
+ "suggested": false,
182
+ "size": 1.96
183
+ },
184
+ "Mistral-7B-v0.3-Instruct-NPU": {
185
+ "checkpoint": "amd/Mistral-7B-Instruct-v0.3-onnx-ryzenai-npu",
186
+ "recipe": "oga-npu",
187
+ "suggested": true,
188
+ "size": 8.09
189
+ },
190
+ "Phi-3.5-Mini-Instruct-NPU": {
191
+ "checkpoint": "amd/Phi-3.5-mini-instruct-onnx-ryzenai-npu",
192
+ "recipe": "oga-npu",
193
+ "suggested": true,
194
+ "size": 4.35
195
+ },
196
+ "ChatGLM-3-6b-Instruct-NPU": {
197
+ "checkpoint": "amd/chatglm3-6b-onnx-ryzenai-npu",
198
+ "recipe": "oga-npu",
199
+ "suggested": false,
200
+ "size": 7.03
201
+ },
202
+ "Llama-3.2-1B-Instruct-DirectML": {
203
+ "checkpoint": "amd/Llama-3.2-1B-Instruct-dml-int4-awq-block-128-directml",
204
+ "recipe": "oga-igpu",
205
+ "suggested": false,
206
+ "size": 2.81
207
+ },
208
+ "Llama-3.2-3B-Instruct-DirectML": {
209
+ "checkpoint": "amd/Llama-3.2-3B-Instruct-dml-int4-awq-block-128-directml",
210
+ "recipe": "oga-igpu",
211
+ "suggested": false,
212
+ "size": 6.75
213
+ },
214
+ "Phi-3.5-Mini-Instruct-DirectML": {
215
+ "checkpoint": "amd/phi3.5-mini-instruct-int4-awq-block-128-directml",
216
+ "recipe": "oga-igpu",
217
+ "suggested": false,
218
+ "size": 2.14
219
+ },
220
+ "Qwen-1.5-7B-Chat-DirectML": {
221
+ "checkpoint": "amd/Qwen1.5-7B-Chat-dml-int4-awq-block-128-directml",
222
+ "recipe": "oga-igpu",
223
+ "suggested": false,
224
+ "size": 3.73
225
+ },
226
+ "Mistral-7B-v0.1-Instruct-DirectML": {
227
+ "checkpoint": "amd/Mistral-7B-Instruct-v0.1-awq-g128-int4-onnx-directml",
228
+ "recipe": "oga-igpu",
229
+ "suggested": false,
230
+ "size": 3.67
231
+ },
232
+ "Llama-3-8B-Instruct-DirectML": {
233
+ "checkpoint": "amd/llama3-8b-instruct-awq-g128-int4-onnx-directml",
234
+ "recipe": "oga-igpu",
235
+ "suggested": false,
236
+ "size": 4.61
237
+ },
238
+ "Qwen3-0.6B-GGUF": {
239
+ "checkpoint": "unsloth/Qwen3-0.6B-GGUF:Q4_0",
240
+ "recipe": "llamacpp",
241
+ "suggested": true,
242
+ "labels": ["reasoning"],
243
+ "size": 0.38
244
+ },
245
+ "Qwen3-1.7B-GGUF": {
246
+ "checkpoint": "unsloth/Qwen3-1.7B-GGUF:Q4_0",
247
+ "recipe": "llamacpp",
248
+ "suggested": true,
249
+ "labels": ["reasoning"],
250
+ "size": 1.06
251
+ },
252
+ "Qwen3-4B-GGUF": {
253
+ "checkpoint": "unsloth/Qwen3-4B-GGUF:Q4_0",
254
+ "recipe": "llamacpp",
255
+ "suggested": true,
256
+ "labels": ["reasoning"],
257
+ "size": 2.38
258
+ },
259
+ "Qwen3-8B-GGUF": {
260
+ "checkpoint": "unsloth/Qwen3-8B-GGUF:Q4_1",
261
+ "recipe": "llamacpp",
262
+ "suggested": true,
263
+ "labels": ["reasoning"],
264
+ "size": 5.25
265
+ },
266
+ "DeepSeek-Qwen3-8B-GGUF": {
267
+ "checkpoint": "unsloth/DeepSeek-R1-0528-Qwen3-8B-GGUF:Q4_1",
268
+ "recipe": "llamacpp",
269
+ "suggested": true,
270
+ "labels": ["reasoning"],
271
+ "size": 5.25
272
+ },
273
+ "Qwen3-14B-GGUF": {
274
+ "checkpoint": "unsloth/Qwen3-14B-GGUF:Q4_0",
275
+ "recipe": "llamacpp",
276
+ "suggested": true,
277
+ "labels": ["reasoning"],
278
+ "size": 8.54
279
+ },
280
+ "Qwen3-4B-Instruct-2507-GGUF": {
281
+ "checkpoint": "unsloth/Qwen3-4B-Instruct-2507-GGUF:Qwen3-4B-Instruct-2507-Q4_K_M.gguf",
282
+ "recipe": "llamacpp",
283
+ "suggested": true,
284
+ "labels": ["hot"],
285
+ "size": 2.5
286
+ },
287
+ "Qwen3-30B-A3B-GGUF": {
288
+ "checkpoint": "unsloth/Qwen3-30B-A3B-GGUF:Q4_0",
289
+ "recipe": "llamacpp",
290
+ "suggested": true,
291
+ "labels": ["reasoning"],
292
+ "size": 17.4
293
+ },
294
+ "Qwen3-30B-A3B-Instruct-2507-GGUF": {
295
+ "checkpoint": "unsloth/Qwen3-30B-A3B-Instruct-2507-GGUF:Qwen3-30B-A3B-Instruct-2507-Q4_0.gguf",
296
+ "recipe": "llamacpp",
297
+ "suggested": true,
298
+ "size": 17.4
299
+ },
300
+ "Qwen3-Coder-30B-A3B-Instruct-GGUF": {
301
+ "checkpoint": "unsloth/Qwen3-Coder-30B-A3B-Instruct-GGUF:Qwen3-Coder-30B-A3B-Instruct-Q4_K_M.gguf",
302
+ "recipe": "llamacpp",
303
+ "suggested": true,
304
+ "labels": ["coding","tool-calling","hot"],
305
+ "size": 18.6
306
+ },
307
+ "Gemma-3-4b-it-GGUF": {
308
+ "checkpoint": "ggml-org/gemma-3-4b-it-GGUF:Q4_K_M",
309
+ "mmproj": "mmproj-model-f16.gguf",
310
+ "recipe": "llamacpp",
311
+ "suggested": true,
312
+ "labels": ["hot","vision"],
313
+ "size": 3.61
314
+ },
315
+ "Phi-4-mini-instruct-GGUF": {
316
+ "checkpoint": "unsloth/Phi-4-mini-instruct-GGUF:Phi-4-mini-instruct-Q4_K_M.gguf",
317
+ "recipe": "llamacpp",
318
+ "suggested": true,
319
+ "size": 2.49
320
+ },
321
+ "LFM2-1.2B-GGUF": {
322
+ "checkpoint": "LiquidAI/LFM2-1.2B-GGUF:LFM2-1.2B-Q4_K_M.gguf",
323
+ "recipe": "llamacpp",
324
+ "suggested": true,
325
+ "size": 0.731
326
+ },
327
+ "Jan-nano-128k-GGUF": {
328
+ "checkpoint": "Menlo/Jan-nano-128k-gguf:jan-nano-128k-Q4_K_M.gguf",
329
+ "recipe": "llamacpp",
330
+ "suggested": true,
331
+ "size": 2.5
332
+ },
333
+ "Jan-v1-4B-GGUF": {
334
+ "checkpoint": "janhq/Jan-v1-4B-GGUF:Jan-v1-4B-Q4_K_M.gguf",
335
+ "recipe": "llamacpp",
336
+ "suggested": true,
337
+ "size": 2.5
338
+ },
339
+ "Llama-3.2-1B-Instruct-GGUF": {
340
+ "checkpoint": "unsloth/Llama-3.2-1B-Instruct-GGUF:Llama-3.2-1B-Instruct-UD-Q4_K_XL.gguf",
341
+ "recipe": "llamacpp",
342
+ "suggested": true,
343
+ "size": 0.834
344
+ },
345
+ "Llama-3.2-3B-Instruct-GGUF": {
346
+ "checkpoint": "unsloth/Llama-3.2-3B-Instruct-GGUF:Llama-3.2-3B-Instruct-UD-Q4_K_XL.gguf",
347
+ "recipe": "llamacpp",
348
+ "suggested": true,
349
+ "size": 2.06
350
+ },
351
+ "SmolLM3-3B-GGUF": {
352
+ "checkpoint": "unsloth/SmolLM3-3B-128K-GGUF:SmolLM3-3B-128K-UD-Q4_K_XL.gguf",
353
+ "recipe": "llamacpp",
354
+ "suggested": true,
355
+ "size": 1.94
356
+ },
357
+ "Ministral-3-3B-Instruct-2512-GGUF": {
358
+ "checkpoint": "mistralai/Ministral-3-3B-Instruct-2512-GGUF:Ministral-3-3B-Instruct-2512-Q4_K_M.gguf",
359
+ "mmproj": "Ministral-3-3B-Instruct-2512-BF16-mmproj.gguf",
360
+ "recipe": "llamacpp",
361
+ "suggested": true,
362
+ "labels": ["vision"],
363
+ "size": 2.85
364
+ },
365
+ "Qwen2.5-VL-7B-Instruct-GGUF": {
366
+ "checkpoint": "ggml-org/Qwen2.5-VL-7B-Instruct-GGUF:Q4_K_M",
367
+ "mmproj": "mmproj-Qwen2.5-VL-7B-Instruct-f16.gguf",
368
+ "recipe": "llamacpp",
369
+ "suggested": true,
370
+ "labels": ["vision"],
371
+ "size": 4.68
372
+ },
373
+ "Qwen3-VL-4B-Instruct-GGUF": {
374
+ "checkpoint": "Qwen/Qwen3-VL-4B-Instruct-GGUF:Q4_K_M",
375
+ "mmproj": "mmproj-Qwen3VL-4B-Instruct-F16.gguf",
376
+ "recipe": "llamacpp",
377
+ "suggested": true,
378
+ "labels": ["vision"],
379
+ "size": 3.33
380
+ },
381
+ "Qwen3-VL-8B-Instruct-GGUF": {
382
+ "checkpoint": "Qwen/Qwen3-VL-8B-Instruct-GGUF:Q4_K_M",
383
+ "mmproj": "mmproj-Qwen3VL-8B-Instruct-F16.gguf",
384
+ "recipe": "llamacpp",
385
+ "suggested": true,
386
+ "labels": ["vision"],
387
+ "size": 6.19
388
+ },
389
+ "Qwen3-Next-80B-A3B-Instruct-GGUF": {
390
+ "checkpoint": "unsloth/Qwen3-Next-80B-A3B-Instruct-GGUF:Qwen3-Next-80B-A3B-Instruct-UD-Q4_K_XL.gguf",
391
+ "recipe": "llamacpp",
392
+ "suggested": true,
393
+ "labels": ["hot"],
394
+ "size": 45.1
395
+ },
396
+ "Llama-4-Scout-17B-16E-Instruct-GGUF": {
397
+ "checkpoint": "unsloth/Llama-4-Scout-17B-16E-Instruct-GGUF:Q4_K_S",
398
+ "mmproj": "mmproj-F16.gguf",
399
+ "recipe": "llamacpp",
400
+ "suggested": true,
401
+ "labels": ["vision"],
402
+ "size": 61.5
403
+ },
404
+ "Cogito-v2-llama-109B-MoE-GGUF": {
405
+ "checkpoint": "unsloth/cogito-v2-preview-llama-109B-MoE-GGUF:Q4_K_M",
406
+ "mmproj": "mmproj-F16.gguf",
407
+ "recipe": "llamacpp",
408
+ "suggested": false,
409
+ "labels": ["vision"],
410
+ "size": 65.3
411
+ },
412
+ "nomic-embed-text-v1-GGUF": {
413
+ "checkpoint": "nomic-ai/nomic-embed-text-v1-GGUF:Q4_K_S",
414
+ "recipe": "llamacpp",
415
+ "suggested": true,
416
+ "labels": ["embeddings"],
417
+ "size": 0.0781
418
+ },
419
+ "nomic-embed-text-v2-moe-GGUF": {
420
+ "checkpoint": "nomic-ai/nomic-embed-text-v2-moe-GGUF:Q8_0",
421
+ "recipe": "llamacpp",
422
+ "suggested": true,
423
+ "labels": ["embeddings"],
424
+ "size": 0.51
425
+ },
426
+ "Qwen3-Embedding-0.6B-GGUF": {
427
+ "checkpoint": "Qwen/Qwen3-Embedding-0.6B-GGUF:Qwen3-Embedding-0.6B-Q8_0.gguf",
428
+ "recipe": "llamacpp",
429
+ "suggested": true,
430
+ "labels": ["embeddings"],
431
+ "size": 0.64
432
+ },
433
+ "Qwen3-Embedding-4B-GGUF": {
434
+ "checkpoint": "Qwen/Qwen3-Embedding-4B-GGUF:Qwen3-Embedding-4B-Q8_0.gguf",
435
+ "recipe": "llamacpp",
436
+ "suggested": true,
437
+ "labels": ["embeddings"],
438
+ "size": 4.28
439
+ },
440
+ "Qwen3-Embedding-8B-GGUF": {
441
+ "checkpoint": "Qwen/Qwen3-Embedding-8B-GGUF:Qwen3-Embedding-8B-Q8_0.gguf",
442
+ "recipe": "llamacpp",
443
+ "suggested": true,
444
+ "labels": ["embeddings"],
445
+ "size": 8.05
446
+ },
447
+ "bge-reranker-v2-m3-GGUF": {
448
+ "checkpoint": "pqnet/bge-reranker-v2-m3-Q8_0-GGUF",
449
+ "recipe": "llamacpp",
450
+ "suggested": true,
451
+ "labels": ["reranking"],
452
+ "size": 0.53
453
+ },
454
+ "jina-reranker-v1-tiny-en-GGUF": {
455
+ "checkpoint": "mradermacher/jina-reranker-v1-tiny-en-GGUF:Q8_0",
456
+ "recipe": "llamacpp",
457
+ "suggested": false,
458
+ "labels": ["reranking"],
459
+ "size": 0.03
460
+ },
461
+ "Devstral-Small-2507-GGUF":{
462
+ "checkpoint": "mistralai/Devstral-Small-2507_gguf:Q4_K_M",
463
+ "recipe": "llamacpp",
464
+ "suggested": true,
465
+ "labels": ["coding","tool-calling"],
466
+ "size": 14.3
467
+ },
468
+ "Qwen2.5-Coder-32B-Instruct-GGUF": {
469
+ "checkpoint": "Qwen/Qwen2.5-Coder-32B-Instruct-GGUF:Q4_K_M",
470
+ "recipe": "llamacpp",
471
+ "suggested": true,
472
+ "labels": ["coding"],
473
+ "size": 19.85
474
+ },
475
+ "gpt-oss-120b-GGUF": {
476
+ "checkpoint": "unsloth/gpt-oss-120b-GGUF:Q4_K_M",
477
+ "recipe": "llamacpp",
478
+ "suggested": false,
479
+ "labels": ["reasoning", "tool-calling"],
480
+ "size": 62.7
481
+ },
482
+ "gpt-oss-20b-GGUF": {
483
+ "checkpoint": "unsloth/gpt-oss-20b-GGUF:Q4_K_M",
484
+ "recipe": "llamacpp",
485
+ "suggested": false,
486
+ "labels": ["reasoning", "tool-calling"],
487
+ "size": 11.6
488
+ },
489
+ "gpt-oss-120b-mxfp-GGUF": {
490
+ "checkpoint": "ggml-org/gpt-oss-120b-GGUF:*",
491
+ "recipe": "llamacpp",
492
+ "suggested": true,
493
+ "labels": ["hot", "reasoning", "tool-calling"],
494
+ "size": 63.3
495
+ },
496
+ "gpt-oss-20b-mxfp4-GGUF": {
497
+ "checkpoint": "ggml-org/gpt-oss-20b-GGUF",
498
+ "recipe": "llamacpp",
499
+ "suggested": true,
500
+ "labels": ["hot", "reasoning", "tool-calling"],
501
+ "size": 12.1
502
+ },
503
+ "GLM-4.5-Air-UD-Q4K-XL-GGUF": {
504
+ "checkpoint": "unsloth/GLM-4.5-Air-GGUF:UD-Q4_K_XL",
505
+ "recipe": "llamacpp",
506
+ "suggested": true,
507
+ "labels": ["reasoning"],
508
+ "size": 73.1
509
+ },
510
+ "Playable1-GGUF": {
511
+ "checkpoint": "playable/Playable1-GGUF:Playable1-q4_k_m.gguf",
512
+ "recipe": "llamacpp",
513
+ "suggested": false,
514
+ "labels": ["coding"],
515
+ "size": 4.68
516
+ },
517
+ "granite-4.0-h-tiny-GGUF": {
518
+ "checkpoint": "unsloth/granite-4.0-h-tiny-GGUF:Q4_K_M",
519
+ "recipe": "llamacpp",
520
+ "suggested": true,
521
+ "labels": ["tool-calling"],
522
+ "size": 4.25
523
+ },
524
+ "LFM2-8B-A1B-GGUF": {
525
+ "checkpoint": "LiquidAI/LFM2-8B-A1B-GGUF:Q4_K_M",
526
+ "recipe": "llamacpp",
527
+ "suggested": true,
528
+ "size": 4.8
529
+ },
530
+ "gpt-oss-20b-FLM": {
531
+ "checkpoint": "gpt-oss:20b",
532
+ "recipe": "flm",
533
+ "suggested": true,
534
+ "labels": ["reasoning"],
535
+ "size": 13.4
536
+ },
537
+ "Gemma3-1b-it-FLM": {
538
+ "checkpoint": "gemma3:1b",
539
+ "recipe": "flm",
540
+ "suggested": true,
541
+ "size": 1.17
542
+ },
543
+ "Gemma3-4b-it-FLM": {
544
+ "checkpoint": "gemma3:4b",
545
+ "recipe": "flm",
546
+ "suggested": true,
547
+ "labels": ["hot","vision"],
548
+ "size": 5.26
549
+ },
550
+ "Qwen3-4B-VL-FLM": {
551
+ "checkpoint": "qwen3vl-it:4b",
552
+ "recipe": "flm",
553
+ "suggested": true,
554
+ "labels": ["hot","vision"],
555
+ "size": 3.85
556
+ },
557
+ "Qwen3-0.6b-FLM": {
558
+ "checkpoint": "qwen3:0.6b",
559
+ "recipe": "flm",
560
+ "suggested": true,
561
+ "labels": ["reasoning"],
562
+ "size": 0.66
563
+ },
564
+ "Qwen3-4B-Instruct-2507-FLM": {
565
+ "checkpoint": "qwen3-it:4b",
566
+ "recipe": "flm",
567
+ "suggested": true,
568
+ "size": 3.07
569
+ },
570
+ "Qwen3-8b-FLM": {
571
+ "checkpoint": "qwen3:8b",
572
+ "recipe": "flm",
573
+ "suggested": true,
574
+ "labels": ["reasoning"],
575
+ "size": 5.57
576
+ },
577
+ "Llama-3.1-8B-FLM": {
578
+ "checkpoint": "llama3.1:8b",
579
+ "recipe": "flm",
580
+ "suggested": true,
581
+ "size": 5.36
582
+ },
583
+ "Llama-3.2-1B-FLM": {
584
+ "checkpoint": "llama3.2:1b",
585
+ "recipe": "flm",
586
+ "suggested": true,
587
+ "size": 1.21
588
+ },
589
+ "Llama-3.2-3B-FLM": {
590
+ "checkpoint": "llama3.2:3b",
591
+ "recipe": "flm",
592
+ "suggested": true,
593
+ "size": 2.62
594
+ },
595
+ "LFM2-1.2B-FLM": {
596
+ "checkpoint": "lfm2:1.2b",
597
+ "recipe": "flm",
598
+ "suggested": true,
599
+ "size": 0.96
600
+ },
601
+ "Whisper-Tiny": {
602
+ "checkpoint": "ggerganov/whisper.cpp:ggml-tiny.bin",
603
+ "recipe": "whispercpp",
604
+ "suggested": true,
605
+ "labels": ["audio", "transcription"],
606
+ "size": 0.075
607
+ },
608
+ "Whisper-Base": {
609
+ "checkpoint": "ggerganov/whisper.cpp:ggml-base.bin",
610
+ "recipe": "whispercpp",
611
+ "suggested": true,
612
+ "labels": ["audio", "transcription"],
613
+ "size": 0.142
614
+ },
615
+ "Whisper-Small": {
616
+ "checkpoint": "ggerganov/whisper.cpp:ggml-small.bin",
617
+ "recipe": "whispercpp",
618
+ "suggested": true,
619
+ "labels": ["audio", "transcription"],
620
+ "size": 0.466
621
+ },
622
+ "Whisper-Medium": {
623
+ "checkpoint": "ggerganov/whisper.cpp:ggml-medium.bin",
624
+ "recipe": "whispercpp",
625
+ "suggested": true,
626
+ "labels": ["audio", "transcription"],
627
+ "size": 1.42
628
+ },
629
+ "Whisper-Large-v3": {
630
+ "checkpoint": "ggerganov/whisper.cpp:ggml-large-v3.bin",
631
+ "recipe": "whispercpp",
632
+ "suggested": true,
633
+ "labels": ["audio", "transcription"],
634
+ "size": 2.87
635
+ },
636
+ "Whisper-Large-v3-Turbo": {
637
+ "checkpoint": "ggerganov/whisper.cpp:ggml-large-v3-turbo.bin",
638
+ "recipe": "whispercpp",
639
+ "suggested": true,
640
+ "labels": ["audio", "transcription", "hot"],
641
+ "size": 1.55
642
+ }
643
+ }
@@ -0,0 +1,39 @@
1
+ import json
2
+ import os
3
+ from lemonade.cache import DEFAULT_CACHE_DIR
4
+
5
+ # Define the path for the user settings file, placing it in the cache directory
6
+ USER_SETTINGS_FILE = os.path.join(DEFAULT_CACHE_DIR, "user_settings.json")
7
+
8
+
9
+ def save_setting(key, value):
10
+ """Save a setting to the user_settings.json file."""
11
+ # Ensure the cache directory exists
12
+ os.makedirs(DEFAULT_CACHE_DIR, exist_ok=True)
13
+
14
+ settings = {}
15
+ if os.path.exists(USER_SETTINGS_FILE):
16
+ with open(USER_SETTINGS_FILE, "r") as f:
17
+ try:
18
+ settings = json.load(f)
19
+ except json.JSONDecodeError:
20
+ # If the file is empty or corrupt, start with a fresh dictionary
21
+ pass
22
+
23
+ settings[key] = value
24
+ with open(USER_SETTINGS_FILE, "w") as f:
25
+ json.dump(settings, f, indent=4)
26
+
27
+
28
+ def load_setting(key, default=None):
29
+ """Load a setting from the user_settings.json file."""
30
+ if not os.path.exists(USER_SETTINGS_FILE):
31
+ return default
32
+
33
+ with open(USER_SETTINGS_FILE, "r") as f:
34
+ try:
35
+ settings = json.load(f)
36
+ return settings.get(key, default)
37
+ except json.JSONDecodeError:
38
+ # Return default if the file is empty or corrupt
39
+ return default