lemonade-sdk 7.0.0__py3-none-any.whl → 7.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lemonade-sdk might be problematic. Click here for more details.
- lemonade/cli.py +2 -0
- lemonade/tools/accuracy.py +335 -0
- lemonade/tools/server/instructions.py +294 -0
- lemonade/tools/server/llamacpp.py +315 -0
- lemonade/tools/server/port_utils.py +57 -0
- lemonade/tools/server/pydantic_models.py +83 -0
- lemonade/tools/server/serve.py +225 -167
- lemonade/tools/server/static/styles.css +313 -0
- lemonade/tools/server/thread_utils.py +87 -0
- lemonade/tools/server/tool_calls.py +50 -43
- lemonade/version.py +1 -1
- {lemonade_sdk-7.0.0.dist-info → lemonade_sdk-7.0.2.dist-info}/METADATA +4 -7
- {lemonade_sdk-7.0.0.dist-info → lemonade_sdk-7.0.2.dist-info}/RECORD +21 -14
- {lemonade_sdk-7.0.0.dist-info → lemonade_sdk-7.0.2.dist-info}/WHEEL +1 -1
- lemonade_server/cli.py +4 -2
- lemonade_server/model_manager.py +34 -17
- lemonade_server/server_models.json +52 -3
- {lemonade_sdk-7.0.0.dist-info → lemonade_sdk-7.0.2.dist-info}/entry_points.txt +0 -0
- {lemonade_sdk-7.0.0.dist-info → lemonade_sdk-7.0.2.dist-info}/licenses/LICENSE +0 -0
- {lemonade_sdk-7.0.0.dist-info → lemonade_sdk-7.0.2.dist-info}/licenses/NOTICE.md +0 -0
- {lemonade_sdk-7.0.0.dist-info → lemonade_sdk-7.0.2.dist-info}/top_level.txt +0 -0
lemonade_server/model_manager.py
CHANGED
|
@@ -50,7 +50,7 @@ class ModelManager:
|
|
|
50
50
|
downloaded_models = {}
|
|
51
51
|
for model in self.supported_models:
|
|
52
52
|
if (
|
|
53
|
-
self.supported_models[model]["checkpoint"]
|
|
53
|
+
self.supported_models[model]["checkpoint"].split(":")[0]
|
|
54
54
|
in self.downloaded_hf_checkpoints
|
|
55
55
|
):
|
|
56
56
|
downloaded_models[model] = self.supported_models[model]
|
|
@@ -62,22 +62,17 @@ class ModelManager:
|
|
|
62
62
|
Returns a dictionary of locally available models that are enabled by
|
|
63
63
|
the current installation.
|
|
64
64
|
"""
|
|
65
|
-
|
|
66
|
-
"onnxruntime-vitisai" in pkg_resources.working_set.by_key
|
|
67
|
-
and "onnxruntime-genai-directml-ryzenai" in pkg_resources.working_set.by_key
|
|
68
|
-
)
|
|
65
|
+
return self.filter_models_by_backend(self.downloaded_models)
|
|
69
66
|
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
return downloaded_models_enabled
|
|
67
|
+
def download_gguf(self, checkpoint) -> str:
|
|
68
|
+
# The colon after the checkpoint name indicates which
|
|
69
|
+
# specific GGUF to download
|
|
70
|
+
repo_id = checkpoint.split(":")[0]
|
|
71
|
+
pattern_to_match = f'*{checkpoint.split(":")[1]}.gguf'
|
|
72
|
+
return huggingface_hub.snapshot_download(
|
|
73
|
+
repo_id=repo_id,
|
|
74
|
+
allow_patterns=[pattern_to_match],
|
|
75
|
+
)
|
|
81
76
|
|
|
82
77
|
def download_models(self, models: list[str]):
|
|
83
78
|
"""
|
|
@@ -91,7 +86,29 @@ class ModelManager:
|
|
|
91
86
|
)
|
|
92
87
|
checkpoint = self.supported_models[model]["checkpoint"]
|
|
93
88
|
print(f"Downloading {model} ({checkpoint})")
|
|
94
|
-
|
|
89
|
+
|
|
90
|
+
if "gguf" in checkpoint.lower():
|
|
91
|
+
self.download_gguf(checkpoint)
|
|
92
|
+
else:
|
|
93
|
+
huggingface_hub.snapshot_download(repo_id=checkpoint)
|
|
94
|
+
|
|
95
|
+
def filter_models_by_backend(self, models: dict) -> dict:
|
|
96
|
+
"""
|
|
97
|
+
Returns a filtered dict of models that are enabled by the
|
|
98
|
+
current environment.
|
|
99
|
+
"""
|
|
100
|
+
hybrid_installed = (
|
|
101
|
+
"onnxruntime-vitisai" in pkg_resources.working_set.by_key
|
|
102
|
+
and "onnxruntime-genai-directml-ryzenai" in pkg_resources.working_set.by_key
|
|
103
|
+
)
|
|
104
|
+
filtered = {}
|
|
105
|
+
for model, value in models.items():
|
|
106
|
+
if value.get("recipe") == "oga-hybrid":
|
|
107
|
+
if hybrid_installed:
|
|
108
|
+
filtered[model] = value
|
|
109
|
+
else:
|
|
110
|
+
filtered[model] = value
|
|
111
|
+
return filtered
|
|
95
112
|
|
|
96
113
|
|
|
97
114
|
# This file was originally licensed under Apache 2.0. It has been modified.
|
|
@@ -9,13 +9,13 @@
|
|
|
9
9
|
"checkpoint": "amd/Llama-3.2-1B-Instruct-awq-uint4-float16-cpu-onnx",
|
|
10
10
|
"recipe": "oga-cpu",
|
|
11
11
|
"reasoning": false,
|
|
12
|
-
"suggested":
|
|
12
|
+
"suggested": false
|
|
13
13
|
},
|
|
14
14
|
"Llama-3.2-3B-Instruct-CPU": {
|
|
15
15
|
"checkpoint": "amd/Llama-3.2-3B-Instruct-awq-uint4-float16-cpu-onnx",
|
|
16
16
|
"recipe": "oga-cpu",
|
|
17
17
|
"reasoning": false,
|
|
18
|
-
"suggested":
|
|
18
|
+
"suggested": false
|
|
19
19
|
},
|
|
20
20
|
"Phi-3-Mini-Instruct-CPU": {
|
|
21
21
|
"checkpoint": "amd/Phi-3-mini-4k-instruct_int4_float16_onnx_cpu",
|
|
@@ -103,6 +103,13 @@
|
|
|
103
103
|
"max_prompt_length": 2000,
|
|
104
104
|
"suggested": true
|
|
105
105
|
},
|
|
106
|
+
"Llama-xLAM-2-8b-fc-r-Hybrid": {
|
|
107
|
+
"checkpoint": "amd/Llama-xLAM-2-8b-fc-r-awq-g128-int4-asym-bfp16-onnx-hybrid",
|
|
108
|
+
"recipe": "oga-hybrid",
|
|
109
|
+
"reasoning": false,
|
|
110
|
+
"max_prompt_length": 2000,
|
|
111
|
+
"suggested": true
|
|
112
|
+
},
|
|
106
113
|
"Llama-3.2-1B-Instruct-DirectML": {
|
|
107
114
|
"checkpoint": "amd/Llama-3.2-1B-Instruct-dml-int4-awq-block-128-directml",
|
|
108
115
|
"recipe": "oga-igpu",
|
|
@@ -138,5 +145,47 @@
|
|
|
138
145
|
"recipe": "oga-igpu",
|
|
139
146
|
"reasoning": false,
|
|
140
147
|
"suggested": false
|
|
148
|
+
},
|
|
149
|
+
"Qwen3-0.6B-GGUF": {
|
|
150
|
+
"checkpoint": "unsloth/Qwen3-0.6B-GGUF:Q4_0",
|
|
151
|
+
"recipe": "llamacpp",
|
|
152
|
+
"reasoning": true,
|
|
153
|
+
"suggested": true
|
|
154
|
+
},
|
|
155
|
+
"Qwen3-1.7B-GGUF": {
|
|
156
|
+
"checkpoint": "unsloth/Qwen3-1.7B-GGUF:Q4_0",
|
|
157
|
+
"recipe": "llamacpp",
|
|
158
|
+
"reasoning": true,
|
|
159
|
+
"suggested": true
|
|
160
|
+
},
|
|
161
|
+
"Qwen3-4B-GGUF": {
|
|
162
|
+
"checkpoint": "unsloth/Qwen3-4B-GGUF:Q4_0",
|
|
163
|
+
"recipe": "llamacpp",
|
|
164
|
+
"reasoning": true,
|
|
165
|
+
"suggested": true
|
|
166
|
+
},
|
|
167
|
+
"Qwen3-8B-GGUF": {
|
|
168
|
+
"checkpoint": "unsloth/Qwen3-8B-GGUF:Q4_1",
|
|
169
|
+
"recipe": "llamacpp",
|
|
170
|
+
"reasoning": true,
|
|
171
|
+
"suggested": true
|
|
172
|
+
},
|
|
173
|
+
"DeepSeek-Qwen3-8B-GGUF": {
|
|
174
|
+
"checkpoint": "unsloth/DeepSeek-R1-0528-Qwen3-8B-GGUF:Q4_1",
|
|
175
|
+
"recipe": "llamacpp",
|
|
176
|
+
"reasoning": true,
|
|
177
|
+
"suggested": true
|
|
178
|
+
},
|
|
179
|
+
"Qwen3-14B-GGUF": {
|
|
180
|
+
"checkpoint": "unsloth/Qwen3-14B-GGUF:Q4_0",
|
|
181
|
+
"recipe": "llamacpp",
|
|
182
|
+
"reasoning": true,
|
|
183
|
+
"suggested": true
|
|
184
|
+
},
|
|
185
|
+
"Qwen3-30B-A3B-GGUF": {
|
|
186
|
+
"checkpoint": "unsloth/Qwen3-30B-A3B-GGUF:Q4_0",
|
|
187
|
+
"recipe": "llamacpp",
|
|
188
|
+
"reasoning": true,
|
|
189
|
+
"suggested": true
|
|
141
190
|
}
|
|
142
|
-
}
|
|
191
|
+
}
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|