lemonade-sdk 8.0.3__py3-none-any.whl → 8.0.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lemonade-sdk might be problematic. Click here for more details.
- lemonade/tools/humaneval.py +1 -1
- lemonade/tools/mmlu.py +1 -1
- lemonade/tools/oga/load.py +1 -1
- lemonade/tools/perplexity.py +2 -2
- lemonade/tools/quark/quark_load.py +1 -1
- lemonade/tools/quark/quark_quantize.py +2 -2
- lemonade/tools/server/llamacpp.py +130 -9
- lemonade/tools/server/serve.py +73 -0
- lemonade/tools/server/static/styles.css +424 -4
- lemonade/tools/server/static/webapp.html +301 -35
- lemonade/version.py +1 -1
- {lemonade_sdk-8.0.3.dist-info → lemonade_sdk-8.0.4.dist-info}/METADATA +5 -12
- {lemonade_sdk-8.0.3.dist-info → lemonade_sdk-8.0.4.dist-info}/RECORD +21 -21
- lemonade_server/model_manager.py +12 -2
- lemonade_server/pydantic_models.py +25 -1
- lemonade_server/server_models.json +46 -44
- {lemonade_sdk-8.0.3.dist-info → lemonade_sdk-8.0.4.dist-info}/WHEEL +0 -0
- {lemonade_sdk-8.0.3.dist-info → lemonade_sdk-8.0.4.dist-info}/entry_points.txt +0 -0
- {lemonade_sdk-8.0.3.dist-info → lemonade_sdk-8.0.4.dist-info}/licenses/LICENSE +0 -0
- {lemonade_sdk-8.0.3.dist-info → lemonade_sdk-8.0.4.dist-info}/licenses/NOTICE.md +0 -0
- {lemonade_sdk-8.0.3.dist-info → lemonade_sdk-8.0.4.dist-info}/top_level.txt +0 -0
|
@@ -2,197 +2,177 @@
|
|
|
2
2
|
"Qwen2.5-0.5B-Instruct-CPU": {
|
|
3
3
|
"checkpoint": "amd/Qwen2.5-0.5B-Instruct-quantized_int4-float16-cpu-onnx",
|
|
4
4
|
"recipe": "oga-cpu",
|
|
5
|
-
"reasoning": false,
|
|
6
5
|
"suggested": true
|
|
7
6
|
},
|
|
8
7
|
"Llama-3.2-1B-Instruct-CPU": {
|
|
9
8
|
"checkpoint": "amd/Llama-3.2-1B-Instruct-awq-uint4-float16-cpu-onnx",
|
|
10
9
|
"recipe": "oga-cpu",
|
|
11
|
-
"reasoning": false,
|
|
12
10
|
"suggested": false
|
|
13
11
|
},
|
|
14
12
|
"Llama-3.2-3B-Instruct-CPU": {
|
|
15
13
|
"checkpoint": "amd/Llama-3.2-3B-Instruct-awq-uint4-float16-cpu-onnx",
|
|
16
14
|
"recipe": "oga-cpu",
|
|
17
|
-
"reasoning": false,
|
|
18
15
|
"suggested": false
|
|
19
16
|
},
|
|
20
17
|
"Phi-3-Mini-Instruct-CPU": {
|
|
21
18
|
"checkpoint": "amd/Phi-3-mini-4k-instruct_int4_float16_onnx_cpu",
|
|
22
19
|
"recipe": "oga-cpu",
|
|
23
|
-
"reasoning": false,
|
|
24
20
|
"suggested": true
|
|
25
21
|
},
|
|
26
22
|
"Qwen-1.5-7B-Chat-CPU": {
|
|
27
23
|
"checkpoint": "amd/Qwen1.5-7B-Chat_uint4_asym_g128_float16_onnx_cpu",
|
|
28
24
|
"recipe": "oga-cpu",
|
|
29
|
-
"reasoning": false,
|
|
30
25
|
"suggested": true
|
|
31
26
|
},
|
|
32
27
|
"DeepSeek-R1-Distill-Llama-8B-CPU": {
|
|
33
28
|
"checkpoint": "amd/DeepSeek-R1-Distill-Llama-8B-awq-asym-uint4-g128-lmhead-onnx-cpu",
|
|
34
29
|
"recipe": "oga-cpu",
|
|
35
|
-
"
|
|
36
|
-
"
|
|
30
|
+
"suggested": true,
|
|
31
|
+
"labels": ["reasoning"]
|
|
37
32
|
},
|
|
38
33
|
"DeepSeek-R1-Distill-Qwen-7B-CPU": {
|
|
39
34
|
"checkpoint": "amd/DeepSeek-R1-Distill-Llama-8B-awq-asym-uint4-g128-lmhead-onnx-cpu",
|
|
40
35
|
"recipe": "oga-cpu",
|
|
41
|
-
"
|
|
42
|
-
"
|
|
36
|
+
"suggested": true,
|
|
37
|
+
"labels": ["reasoning"]
|
|
43
38
|
},
|
|
44
39
|
"Llama-3.2-1B-Instruct-Hybrid": {
|
|
45
40
|
"checkpoint": "amd/Llama-3.2-1B-Instruct-awq-g128-int4-asym-fp16-onnx-hybrid",
|
|
46
41
|
"recipe": "oga-hybrid",
|
|
47
|
-
"reasoning": false,
|
|
48
42
|
"max_prompt_length": 3000,
|
|
49
43
|
"suggested": true
|
|
50
44
|
},
|
|
51
45
|
"Llama-3.2-3B-Instruct-Hybrid": {
|
|
52
46
|
"checkpoint": "amd/Llama-3.2-3B-Instruct-awq-g128-int4-asym-fp16-onnx-hybrid",
|
|
53
47
|
"recipe": "oga-hybrid",
|
|
54
|
-
"reasoning": false,
|
|
55
48
|
"max_prompt_length": 2000,
|
|
56
49
|
"suggested": true
|
|
57
50
|
},
|
|
58
51
|
"Phi-3-Mini-Instruct-Hybrid": {
|
|
59
52
|
"checkpoint": "amd/Phi-3-mini-4k-instruct-awq-g128-int4-asym-fp16-onnx-hybrid",
|
|
60
53
|
"recipe": "oga-hybrid",
|
|
61
|
-
"reasoning": false,
|
|
62
54
|
"max_prompt_length": 2000,
|
|
63
55
|
"suggested": true
|
|
64
56
|
},
|
|
65
57
|
"Phi-3.5-Mini-Instruct-Hybrid": {
|
|
66
58
|
"checkpoint": "amd/Phi-3.5-mini-instruct-awq-g128-int4-asym-fp16-onnx-hybrid",
|
|
67
59
|
"recipe": "oga-hybrid",
|
|
68
|
-
"reasoning": false,
|
|
69
60
|
"suggested": false
|
|
70
61
|
},
|
|
71
62
|
"Qwen-1.5-7B-Chat-Hybrid": {
|
|
72
63
|
"checkpoint": "amd/Qwen1.5-7B-Chat-awq-g128-int4-asym-fp16-onnx-hybrid",
|
|
73
64
|
"recipe": "oga-hybrid",
|
|
74
|
-
"reasoning": false,
|
|
75
65
|
"max_prompt_length": 3000,
|
|
76
66
|
"suggested": true
|
|
77
67
|
},
|
|
78
68
|
"DeepSeek-R1-Distill-Llama-8B-Hybrid": {
|
|
79
69
|
"checkpoint": "amd/DeepSeek-R1-Distill-Llama-8B-awq-asym-uint4-g128-lmhead-onnx-hybrid",
|
|
80
70
|
"recipe": "oga-hybrid",
|
|
81
|
-
"reasoning": true,
|
|
82
71
|
"max_prompt_length": 2000,
|
|
83
|
-
"suggested": true
|
|
72
|
+
"suggested": true,
|
|
73
|
+
"labels": ["reasoning"]
|
|
84
74
|
},
|
|
85
75
|
"DeepSeek-R1-Distill-Qwen-7B-Hybrid": {
|
|
86
76
|
"checkpoint": "amd/DeepSeek-R1-Distill-Qwen-7B-awq-asym-uint4-g128-lmhead-onnx-hybrid",
|
|
87
77
|
"recipe": "oga-hybrid",
|
|
88
|
-
"reasoning": true,
|
|
89
78
|
"max_prompt_length": 2000,
|
|
90
|
-
"suggested": true
|
|
79
|
+
"suggested": true,
|
|
80
|
+
"labels": ["reasoning"]
|
|
91
81
|
},
|
|
92
82
|
"Mistral-7B-v0.3-Instruct-Hybrid": {
|
|
93
83
|
"checkpoint": "amd/Mistral-7B-Instruct-v0.3-awq-g128-int4-asym-fp16-onnx-hybrid",
|
|
94
84
|
"recipe": "oga-hybrid",
|
|
95
|
-
"reasoning": false,
|
|
96
85
|
"max_prompt_length": 2000,
|
|
97
86
|
"suggested": true
|
|
98
87
|
},
|
|
99
88
|
"Llama-3.1-8B-Instruct-Hybrid": {
|
|
100
89
|
"checkpoint": "amd/Llama-3.1-8B-Instruct-awq-asym-uint4-g128-lmhead-onnx-hybrid",
|
|
101
90
|
"recipe": "oga-hybrid",
|
|
102
|
-
"reasoning": false,
|
|
103
91
|
"max_prompt_length": 2000,
|
|
104
92
|
"suggested": true
|
|
105
93
|
},
|
|
106
94
|
"Llama-xLAM-2-8b-fc-r-Hybrid": {
|
|
107
95
|
"checkpoint": "amd/Llama-xLAM-2-8b-fc-r-awq-g128-int4-asym-bfp16-onnx-hybrid",
|
|
108
96
|
"recipe": "oga-hybrid",
|
|
109
|
-
"reasoning": false,
|
|
110
97
|
"max_prompt_length": 2000,
|
|
111
98
|
"suggested": true
|
|
112
99
|
},
|
|
113
100
|
"Llama-3.2-1B-Instruct-DirectML": {
|
|
114
101
|
"checkpoint": "amd/Llama-3.2-1B-Instruct-dml-int4-awq-block-128-directml",
|
|
115
102
|
"recipe": "oga-igpu",
|
|
116
|
-
"reasoning": false,
|
|
117
103
|
"suggested": false
|
|
118
104
|
},
|
|
119
105
|
"Llama-3.2-3B-Instruct-DirectML": {
|
|
120
106
|
"checkpoint": "amd/Llama-3.2-3B-Instruct-dml-int4-awq-block-128-directml",
|
|
121
107
|
"recipe": "oga-igpu",
|
|
122
|
-
"reasoning": false,
|
|
123
108
|
"suggested": false
|
|
124
109
|
},
|
|
125
110
|
"Phi-3.5-Mini-Instruct-DirectML": {
|
|
126
111
|
"checkpoint": "amd/phi3.5-mini-instruct-int4-awq-block-128-directml",
|
|
127
112
|
"recipe": "oga-igpu",
|
|
128
|
-
"reasoning": false,
|
|
129
113
|
"suggested": false
|
|
130
114
|
},
|
|
131
115
|
"Qwen-1.5-7B-Chat-DirectML": {
|
|
132
116
|
"checkpoint": "amd/Qwen1.5-7B-Chat-dml-int4-awq-block-128-directml",
|
|
133
117
|
"recipe": "oga-igpu",
|
|
134
|
-
"reasoning": false,
|
|
135
118
|
"suggested": false
|
|
136
119
|
},
|
|
137
120
|
"Mistral-7B-v0.1-Instruct-DirectML": {
|
|
138
121
|
"checkpoint": "amd/Mistral-7B-Instruct-v0.1-awq-g128-int4-onnx-directml",
|
|
139
122
|
"recipe": "oga-igpu",
|
|
140
|
-
"reasoning": false,
|
|
141
123
|
"suggested": false
|
|
142
124
|
},
|
|
143
125
|
"Llama-3-8B-Instruct-DirectML": {
|
|
144
126
|
"checkpoint": "amd/llama3-8b-instruct-awq-g128-int4-onnx-directml",
|
|
145
127
|
"recipe": "oga-igpu",
|
|
146
|
-
"reasoning": false,
|
|
147
128
|
"suggested": false
|
|
148
129
|
},
|
|
149
130
|
"Qwen3-0.6B-GGUF": {
|
|
150
131
|
"checkpoint": "unsloth/Qwen3-0.6B-GGUF:Q4_0",
|
|
151
132
|
"recipe": "llamacpp",
|
|
152
|
-
"
|
|
153
|
-
"
|
|
133
|
+
"suggested": true,
|
|
134
|
+
"labels": ["reasoning"]
|
|
154
135
|
},
|
|
155
136
|
"Qwen3-1.7B-GGUF": {
|
|
156
137
|
"checkpoint": "unsloth/Qwen3-1.7B-GGUF:Q4_0",
|
|
157
138
|
"recipe": "llamacpp",
|
|
158
|
-
"
|
|
159
|
-
"
|
|
139
|
+
"suggested": true,
|
|
140
|
+
"labels": ["reasoning"]
|
|
160
141
|
},
|
|
161
142
|
"Qwen3-4B-GGUF": {
|
|
162
143
|
"checkpoint": "unsloth/Qwen3-4B-GGUF:Q4_0",
|
|
163
144
|
"recipe": "llamacpp",
|
|
164
|
-
"
|
|
165
|
-
"
|
|
145
|
+
"suggested": true,
|
|
146
|
+
"labels": ["reasoning"]
|
|
166
147
|
},
|
|
167
148
|
"Qwen3-8B-GGUF": {
|
|
168
149
|
"checkpoint": "unsloth/Qwen3-8B-GGUF:Q4_1",
|
|
169
150
|
"recipe": "llamacpp",
|
|
170
|
-
"
|
|
171
|
-
"
|
|
151
|
+
"suggested": true,
|
|
152
|
+
"labels": ["reasoning"]
|
|
172
153
|
},
|
|
173
154
|
"DeepSeek-Qwen3-8B-GGUF": {
|
|
174
155
|
"checkpoint": "unsloth/DeepSeek-R1-0528-Qwen3-8B-GGUF:Q4_1",
|
|
175
156
|
"recipe": "llamacpp",
|
|
176
|
-
"
|
|
177
|
-
"
|
|
157
|
+
"suggested": true,
|
|
158
|
+
"labels": ["reasoning"]
|
|
178
159
|
},
|
|
179
160
|
"Qwen3-14B-GGUF": {
|
|
180
161
|
"checkpoint": "unsloth/Qwen3-14B-GGUF:Q4_0",
|
|
181
162
|
"recipe": "llamacpp",
|
|
182
|
-
"
|
|
183
|
-
"
|
|
163
|
+
"suggested": true,
|
|
164
|
+
"labels": ["reasoning"]
|
|
184
165
|
},
|
|
185
166
|
"Qwen3-30B-A3B-GGUF": {
|
|
186
167
|
"checkpoint": "unsloth/Qwen3-30B-A3B-GGUF:Q4_0",
|
|
187
168
|
"recipe": "llamacpp",
|
|
188
|
-
"
|
|
189
|
-
"
|
|
169
|
+
"suggested": true,
|
|
170
|
+
"labels": ["reasoning"]
|
|
190
171
|
},
|
|
191
172
|
"Gemma-3-4b-it-GGUF": {
|
|
192
173
|
"checkpoint": "ggml-org/gemma-3-4b-it-GGUF:Q4_K_M",
|
|
193
174
|
"mmproj": "mmproj-model-f16.gguf",
|
|
194
175
|
"recipe": "llamacpp",
|
|
195
|
-
"reasoning": false,
|
|
196
176
|
"suggested": true,
|
|
197
177
|
"labels": ["vision"]
|
|
198
178
|
},
|
|
@@ -200,7 +180,6 @@
|
|
|
200
180
|
"checkpoint": "ggml-org/Qwen2.5-VL-7B-Instruct-GGUF:Q4_K_M",
|
|
201
181
|
"mmproj": "mmproj-Qwen2.5-VL-7B-Instruct-f16.gguf",
|
|
202
182
|
"recipe": "llamacpp",
|
|
203
|
-
"reasoning": false,
|
|
204
183
|
"suggested": true,
|
|
205
184
|
"labels": ["vision"]
|
|
206
185
|
},
|
|
@@ -208,8 +187,31 @@
|
|
|
208
187
|
"checkpoint": "unsloth/Llama-4-Scout-17B-16E-Instruct-GGUF:Q4_K_S",
|
|
209
188
|
"mmproj": "mmproj-F16.gguf",
|
|
210
189
|
"recipe": "llamacpp",
|
|
211
|
-
"reasoning": false,
|
|
212
190
|
"suggested": true,
|
|
213
191
|
"labels": ["vision"]
|
|
192
|
+
},
|
|
193
|
+
"nomic-embed-text-v1-GGUF": {
|
|
194
|
+
"checkpoint": "nomic-ai/nomic-embed-text-v1-GGUF:Q4_K_S",
|
|
195
|
+
"recipe": "llamacpp",
|
|
196
|
+
"suggested": true,
|
|
197
|
+
"labels": ["embeddings"]
|
|
198
|
+
},
|
|
199
|
+
"nomic-embed-text-v2-moe-GGUF": {
|
|
200
|
+
"checkpoint": "nomic-ai/nomic-embed-text-v2-moe-GGUF:Q8_0",
|
|
201
|
+
"recipe": "llamacpp",
|
|
202
|
+
"suggested": true,
|
|
203
|
+
"labels": ["embeddings"]
|
|
204
|
+
},
|
|
205
|
+
"bge-reranker-v2-m3-GGUF": {
|
|
206
|
+
"checkpoint": "pqnet/bge-reranker-v2-m3-Q8_0-GGUF",
|
|
207
|
+
"recipe": "llamacpp",
|
|
208
|
+
"suggested": true,
|
|
209
|
+
"labels": ["reranking"]
|
|
210
|
+
},
|
|
211
|
+
"jina-reranker-v1-tiny-en-GGUF": {
|
|
212
|
+
"checkpoint": "mradermacher/jina-reranker-v1-tiny-en-GGUF:Q8_0",
|
|
213
|
+
"recipe": "llamacpp",
|
|
214
|
+
"suggested": false,
|
|
215
|
+
"labels": ["reranking"]
|
|
214
216
|
}
|
|
215
217
|
}
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|