lemonade-sdk 8.0.6__py3-none-any.whl → 8.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lemonade-sdk might be problematic. Click here for more details.
- lemonade/common/network.py +18 -1
- lemonade/tools/llamacpp/bench.py +3 -1
- lemonade/tools/llamacpp/utils.py +7 -7
- lemonade/tools/oga/load.py +239 -112
- lemonade/tools/oga/utils.py +19 -7
- lemonade/tools/server/serve.py +19 -28
- lemonade/tools/server/static/styles.css +5 -6
- lemonade/tools/server/static/webapp.html +3 -0
- lemonade/version.py +1 -1
- lemonade_install/install.py +65 -84
- {lemonade_sdk-8.0.6.dist-info → lemonade_sdk-8.1.0.dist-info}/METADATA +30 -19
- {lemonade_sdk-8.0.6.dist-info → lemonade_sdk-8.1.0.dist-info}/RECORD +21 -21
- lemonade_server/cli.py +1 -1
- lemonade_server/model_manager.py +4 -3
- lemonade_server/pydantic_models.py +1 -4
- lemonade_server/server_models.json +35 -11
- {lemonade_sdk-8.0.6.dist-info → lemonade_sdk-8.1.0.dist-info}/WHEEL +0 -0
- {lemonade_sdk-8.0.6.dist-info → lemonade_sdk-8.1.0.dist-info}/entry_points.txt +0 -0
- {lemonade_sdk-8.0.6.dist-info → lemonade_sdk-8.1.0.dist-info}/licenses/LICENSE +0 -0
- {lemonade_sdk-8.0.6.dist-info → lemonade_sdk-8.1.0.dist-info}/licenses/NOTICE.md +0 -0
- {lemonade_sdk-8.0.6.dist-info → lemonade_sdk-8.1.0.dist-info}/top_level.txt +0 -0
|
@@ -39,19 +39,16 @@
|
|
|
39
39
|
"Llama-3.2-1B-Instruct-Hybrid": {
|
|
40
40
|
"checkpoint": "amd/Llama-3.2-1B-Instruct-awq-g128-int4-asym-fp16-onnx-hybrid",
|
|
41
41
|
"recipe": "oga-hybrid",
|
|
42
|
-
"max_prompt_length": 3000,
|
|
43
42
|
"suggested": true
|
|
44
43
|
},
|
|
45
44
|
"Llama-3.2-3B-Instruct-Hybrid": {
|
|
46
45
|
"checkpoint": "amd/Llama-3.2-3B-Instruct-awq-g128-int4-asym-fp16-onnx-hybrid",
|
|
47
46
|
"recipe": "oga-hybrid",
|
|
48
|
-
"max_prompt_length": 2000,
|
|
49
47
|
"suggested": true
|
|
50
48
|
},
|
|
51
49
|
"Phi-3-Mini-Instruct-Hybrid": {
|
|
52
50
|
"checkpoint": "amd/Phi-3-mini-4k-instruct-awq-g128-int4-asym-fp16-onnx-hybrid",
|
|
53
51
|
"recipe": "oga-hybrid",
|
|
54
|
-
"max_prompt_length": 2000,
|
|
55
52
|
"suggested": true
|
|
56
53
|
},
|
|
57
54
|
"Phi-3.5-Mini-Instruct-Hybrid": {
|
|
@@ -62,13 +59,26 @@
|
|
|
62
59
|
"Qwen-1.5-7B-Chat-Hybrid": {
|
|
63
60
|
"checkpoint": "amd/Qwen1.5-7B-Chat-awq-g128-int4-asym-fp16-onnx-hybrid",
|
|
64
61
|
"recipe": "oga-hybrid",
|
|
65
|
-
"
|
|
62
|
+
"suggested": true
|
|
63
|
+
},
|
|
64
|
+
"Qwen-2.5-7B-Instruct-Hybrid": {
|
|
65
|
+
"checkpoint": "amd/Qwen2.5-7B-Instruct-awq-uint4-asym-g128-lmhead-g32-fp16-onnx-hybrid",
|
|
66
|
+
"recipe": "oga-hybrid",
|
|
67
|
+
"suggested": true
|
|
68
|
+
},
|
|
69
|
+
"Qwen-2.5-3B-Instruct-Hybrid": {
|
|
70
|
+
"checkpoint": "amd/Qwen2.5-3B-Instruct-awq-uint4-asym-g128-lmhead-g32-fp16-onnx-hybrid",
|
|
71
|
+
"recipe": "oga-hybrid",
|
|
72
|
+
"suggested": true
|
|
73
|
+
},
|
|
74
|
+
"Qwen-2.5-1.5B-Instruct-Hybrid": {
|
|
75
|
+
"checkpoint": "amd/Qwen2.5-1.5B-Instruct-awq-uint4-asym-g128-lmhead-g32-fp16-onnx-hybrid",
|
|
76
|
+
"recipe": "oga-hybrid",
|
|
66
77
|
"suggested": true
|
|
67
78
|
},
|
|
68
79
|
"DeepSeek-R1-Distill-Llama-8B-Hybrid": {
|
|
69
80
|
"checkpoint": "amd/DeepSeek-R1-Distill-Llama-8B-awq-asym-uint4-g128-lmhead-onnx-hybrid",
|
|
70
81
|
"recipe": "oga-hybrid",
|
|
71
|
-
"max_prompt_length": 2000,
|
|
72
82
|
"suggested": true,
|
|
73
83
|
"labels": ["reasoning"]
|
|
74
84
|
},
|
|
@@ -76,25 +86,32 @@
|
|
|
76
86
|
"checkpoint": "amd/DeepSeek-R1-Distill-Qwen-7B-awq-asym-uint4-g128-lmhead-onnx-hybrid",
|
|
77
87
|
"recipe": "oga-hybrid",
|
|
78
88
|
"max_prompt_length": 2000,
|
|
79
|
-
"suggested":
|
|
89
|
+
"suggested": false,
|
|
80
90
|
"labels": ["reasoning"]
|
|
81
91
|
},
|
|
82
92
|
"Mistral-7B-v0.3-Instruct-Hybrid": {
|
|
83
93
|
"checkpoint": "amd/Mistral-7B-Instruct-v0.3-awq-g128-int4-asym-fp16-onnx-hybrid",
|
|
84
94
|
"recipe": "oga-hybrid",
|
|
85
|
-
"max_prompt_length": 2000,
|
|
86
95
|
"suggested": true
|
|
87
96
|
},
|
|
88
97
|
"Llama-3.1-8B-Instruct-Hybrid": {
|
|
89
98
|
"checkpoint": "amd/Llama-3.1-8B-Instruct-awq-asym-uint4-g128-lmhead-onnx-hybrid",
|
|
90
99
|
"recipe": "oga-hybrid",
|
|
91
|
-
"max_prompt_length": 2000,
|
|
92
100
|
"suggested": true
|
|
93
101
|
},
|
|
94
102
|
"Llama-xLAM-2-8b-fc-r-Hybrid": {
|
|
95
103
|
"checkpoint": "amd/Llama-xLAM-2-8b-fc-r-awq-g128-int4-asym-bfp16-onnx-hybrid",
|
|
96
104
|
"recipe": "oga-hybrid",
|
|
97
|
-
"
|
|
105
|
+
"suggested": true
|
|
106
|
+
},
|
|
107
|
+
"Qwen-2.5-7B-Instruct-NPU": {
|
|
108
|
+
"checkpoint": "amd/Qwen2.5-7B-Instruct-awq-g128-int4-asym-bf16-onnx-ryzen-strix",
|
|
109
|
+
"recipe": "oga-npu",
|
|
110
|
+
"suggested": true
|
|
111
|
+
},
|
|
112
|
+
"Qwen-2.5-1.5B-Instruct-NPU": {
|
|
113
|
+
"checkpoint": "amd/Qwen2.5-1.5B-Instruct-awq-g128-int4-asym-bf16-onnx-ryzen-strix",
|
|
114
|
+
"recipe": "oga-npu",
|
|
98
115
|
"suggested": true
|
|
99
116
|
},
|
|
100
117
|
"Llama-3.2-1B-Instruct-DirectML": {
|
|
@@ -169,6 +186,12 @@
|
|
|
169
186
|
"suggested": true,
|
|
170
187
|
"labels": ["reasoning"]
|
|
171
188
|
},
|
|
189
|
+
"Qwen3-30B-A3B-Instruct-2507-GGUF": {
|
|
190
|
+
"checkpoint": "unsloth/Qwen3-30B-A3B-Instruct-2507-GGUF:Qwen3-30B-A3B-Instruct-2507-Q4_0.gguf",
|
|
191
|
+
"recipe": "llamacpp",
|
|
192
|
+
"suggested": true,
|
|
193
|
+
"labels": ["coding"]
|
|
194
|
+
},
|
|
172
195
|
"Gemma-3-4b-it-GGUF": {
|
|
173
196
|
"checkpoint": "ggml-org/gemma-3-4b-it-GGUF:Q4_K_M",
|
|
174
197
|
"mmproj": "mmproj-model-f16.gguf",
|
|
@@ -217,12 +240,13 @@
|
|
|
217
240
|
"Devstral-Small-2507-GGUF":{
|
|
218
241
|
"checkpoint": "mistralai/Devstral-Small-2507_gguf:Q4_K_M",
|
|
219
242
|
"recipe": "llamacpp",
|
|
220
|
-
"suggested": true
|
|
243
|
+
"suggested": true,
|
|
244
|
+
"labels": ["coding"]
|
|
221
245
|
},
|
|
222
246
|
"Qwen2.5-Coder-32B-Instruct-GGUF": {
|
|
223
247
|
"checkpoint": "Qwen/Qwen2.5-Coder-32B-Instruct-GGUF:Q4_K_M",
|
|
224
248
|
"recipe": "llamacpp",
|
|
225
249
|
"suggested": true,
|
|
226
|
-
"labels": ["reasoning"]
|
|
250
|
+
"labels": ["reasoning", "coding"]
|
|
227
251
|
}
|
|
228
252
|
}
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|