lemonade-sdk 7.0.1__py3-none-any.whl → 7.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lemonade-sdk might be problematic. Click here for more details.
- lemonade/cli.py +2 -0
- lemonade/tools/accuracy.py +335 -0
- lemonade/tools/huggingface_load.py +6 -0
- lemonade/tools/ort_genai/oga.py +6 -4
- lemonade/tools/prompt.py +28 -1
- lemonade/tools/server/instructions.py +8 -265
- lemonade/tools/server/llamacpp.py +45 -19
- lemonade/tools/server/port_utils.py +57 -0
- lemonade/tools/server/serve.py +96 -44
- lemonade/tools/server/static/instructions.html +262 -0
- lemonade/tools/server/thread_utils.py +87 -0
- lemonade/version.py +1 -1
- {lemonade_sdk-7.0.1.dist-info → lemonade_sdk-7.0.3.dist-info}/METADATA +1 -1
- {lemonade_sdk-7.0.1.dist-info → lemonade_sdk-7.0.3.dist-info}/RECORD +22 -18
- lemonade_server/model_manager.py +45 -12
- {lemonade/tools/server → lemonade_server}/pydantic_models.py +2 -0
- lemonade_server/server_models.json +25 -4
- {lemonade_sdk-7.0.1.dist-info → lemonade_sdk-7.0.3.dist-info}/WHEEL +0 -0
- {lemonade_sdk-7.0.1.dist-info → lemonade_sdk-7.0.3.dist-info}/entry_points.txt +0 -0
- {lemonade_sdk-7.0.1.dist-info → lemonade_sdk-7.0.3.dist-info}/licenses/LICENSE +0 -0
- {lemonade_sdk-7.0.1.dist-info → lemonade_sdk-7.0.3.dist-info}/licenses/NOTICE.md +0 -0
- {lemonade_sdk-7.0.1.dist-info → lemonade_sdk-7.0.3.dist-info}/top_level.txt +0 -0
lemonade_server/model_manager.py
CHANGED
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import os
|
|
3
3
|
import huggingface_hub
|
|
4
|
-
import
|
|
4
|
+
from importlib.metadata import distributions
|
|
5
|
+
from lemonade_server.pydantic_models import LoadConfig
|
|
5
6
|
|
|
6
7
|
|
|
7
8
|
class ModelManager:
|
|
@@ -64,16 +65,45 @@ class ModelManager:
|
|
|
64
65
|
"""
|
|
65
66
|
return self.filter_models_by_backend(self.downloaded_models)
|
|
66
67
|
|
|
67
|
-
def download_gguf(self,
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
68
|
+
def download_gguf(self, model_config: LoadConfig) -> dict:
|
|
69
|
+
"""
|
|
70
|
+
Downloads the GGUF file for the given model configuration.
|
|
71
|
+
"""
|
|
72
|
+
|
|
73
|
+
# The variant parameter can be either:
|
|
74
|
+
# 1. A full GGUF filename (e.g. "model-Q4_0.gguf")
|
|
75
|
+
# 2. A quantization variant (e.g. "Q4_0")
|
|
76
|
+
# This code handles both cases by constructing the appropriate filename
|
|
77
|
+
checkpoint, variant = model_config.checkpoint.split(":")
|
|
78
|
+
hf_base_name = checkpoint.split("/")[-1].replace("-GGUF", "")
|
|
79
|
+
variant_name = (
|
|
80
|
+
variant if variant.endswith(".gguf") else f"{hf_base_name}-{variant}.gguf"
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
# If there is a mmproj file, add it to the patterns
|
|
84
|
+
expected_files = {"variant": variant_name}
|
|
85
|
+
if model_config.mmproj:
|
|
86
|
+
expected_files["mmproj"] = model_config.mmproj
|
|
87
|
+
|
|
88
|
+
# Download the files
|
|
89
|
+
snapshot_folder = huggingface_hub.snapshot_download(
|
|
90
|
+
repo_id=checkpoint,
|
|
91
|
+
allow_patterns=list(expected_files.values()),
|
|
75
92
|
)
|
|
76
93
|
|
|
94
|
+
# Ensure we downloaded all expected files while creating a dict of the downloaded files
|
|
95
|
+
snapshot_files = {}
|
|
96
|
+
for file in expected_files:
|
|
97
|
+
snapshot_files[file] = os.path.join(snapshot_folder, expected_files[file])
|
|
98
|
+
if expected_files[file] not in os.listdir(snapshot_folder):
|
|
99
|
+
raise ValueError(
|
|
100
|
+
f"Hugging Face snapshot download for {model_config.checkpoint} "
|
|
101
|
+
f"expected file {expected_files[file]} not found in {snapshot_folder}"
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
# Return a dict that points to the snapshot path of the downloaded GGUF files
|
|
105
|
+
return snapshot_files
|
|
106
|
+
|
|
77
107
|
def download_models(self, models: list[str]):
|
|
78
108
|
"""
|
|
79
109
|
Downloads the specified models from Hugging Face.
|
|
@@ -88,7 +118,8 @@ class ModelManager:
|
|
|
88
118
|
print(f"Downloading {model} ({checkpoint})")
|
|
89
119
|
|
|
90
120
|
if "gguf" in checkpoint.lower():
|
|
91
|
-
self.
|
|
121
|
+
model_config = LoadConfig(**self.supported_models[model])
|
|
122
|
+
self.download_gguf(model_config)
|
|
92
123
|
else:
|
|
93
124
|
huggingface_hub.snapshot_download(repo_id=checkpoint)
|
|
94
125
|
|
|
@@ -97,9 +128,11 @@ class ModelManager:
|
|
|
97
128
|
Returns a filtered dict of models that are enabled by the
|
|
98
129
|
current environment.
|
|
99
130
|
"""
|
|
131
|
+
installed_packages = {dist.metadata["Name"].lower() for dist in distributions()}
|
|
132
|
+
|
|
100
133
|
hybrid_installed = (
|
|
101
|
-
"onnxruntime-vitisai" in
|
|
102
|
-
and "onnxruntime-genai-directml-ryzenai" in
|
|
134
|
+
"onnxruntime-vitisai" in installed_packages
|
|
135
|
+
and "onnxruntime-genai-directml-ryzenai" in installed_packages
|
|
103
136
|
)
|
|
104
137
|
filtered = {}
|
|
105
138
|
for model, value in models.items():
|
|
@@ -24,6 +24,8 @@ class LoadConfig(BaseModel):
|
|
|
24
24
|
max_prompt_length: Optional[int] = None
|
|
25
25
|
# Indicates whether the model is a reasoning model, like DeepSeek
|
|
26
26
|
reasoning: Optional[bool] = False
|
|
27
|
+
# Indicates which Multimodal Projector (mmproj) file to use
|
|
28
|
+
mmproj: Optional[str] = None
|
|
27
29
|
|
|
28
30
|
|
|
29
31
|
class CompletionRequest(BaseModel):
|
|
@@ -9,13 +9,13 @@
|
|
|
9
9
|
"checkpoint": "amd/Llama-3.2-1B-Instruct-awq-uint4-float16-cpu-onnx",
|
|
10
10
|
"recipe": "oga-cpu",
|
|
11
11
|
"reasoning": false,
|
|
12
|
-
"suggested":
|
|
12
|
+
"suggested": false
|
|
13
13
|
},
|
|
14
14
|
"Llama-3.2-3B-Instruct-CPU": {
|
|
15
15
|
"checkpoint": "amd/Llama-3.2-3B-Instruct-awq-uint4-float16-cpu-onnx",
|
|
16
16
|
"recipe": "oga-cpu",
|
|
17
17
|
"reasoning": false,
|
|
18
|
-
"suggested":
|
|
18
|
+
"suggested": false
|
|
19
19
|
},
|
|
20
20
|
"Phi-3-Mini-Instruct-CPU": {
|
|
21
21
|
"checkpoint": "amd/Phi-3-mini-4k-instruct_int4_float16_onnx_cpu",
|
|
@@ -103,6 +103,13 @@
|
|
|
103
103
|
"max_prompt_length": 2000,
|
|
104
104
|
"suggested": true
|
|
105
105
|
},
|
|
106
|
+
"Llama-xLAM-2-8b-fc-r-Hybrid": {
|
|
107
|
+
"checkpoint": "amd/Llama-xLAM-2-8b-fc-r-awq-g128-int4-asym-bfp16-onnx-hybrid",
|
|
108
|
+
"recipe": "oga-hybrid",
|
|
109
|
+
"reasoning": false,
|
|
110
|
+
"max_prompt_length": 2000,
|
|
111
|
+
"suggested": true
|
|
112
|
+
},
|
|
106
113
|
"Llama-3.2-1B-Instruct-DirectML": {
|
|
107
114
|
"checkpoint": "amd/Llama-3.2-1B-Instruct-dml-int4-awq-block-128-directml",
|
|
108
115
|
"recipe": "oga-igpu",
|
|
@@ -158,7 +165,7 @@
|
|
|
158
165
|
"suggested": true
|
|
159
166
|
},
|
|
160
167
|
"Qwen3-8B-GGUF": {
|
|
161
|
-
"checkpoint": "unsloth/Qwen3-8B-GGUF:
|
|
168
|
+
"checkpoint": "unsloth/Qwen3-8B-GGUF:Q4_1",
|
|
162
169
|
"recipe": "llamacpp",
|
|
163
170
|
"reasoning": true,
|
|
164
171
|
"suggested": true
|
|
@@ -180,5 +187,19 @@
|
|
|
180
187
|
"recipe": "llamacpp",
|
|
181
188
|
"reasoning": true,
|
|
182
189
|
"suggested": true
|
|
190
|
+
},
|
|
191
|
+
"Gemma-3-4b-it-GGUF": {
|
|
192
|
+
"checkpoint": "ggml-org/gemma-3-4b-it-GGUF:Q4_K_M",
|
|
193
|
+
"mmproj": "mmproj-model-f16.gguf",
|
|
194
|
+
"recipe": "llamacpp",
|
|
195
|
+
"reasoning": false,
|
|
196
|
+
"suggested": true
|
|
197
|
+
},
|
|
198
|
+
"Qwen2.5-VL-7B-Instruct": {
|
|
199
|
+
"checkpoint": "ggml-org/Qwen2.5-VL-7B-Instruct-GGUF:Q4_K_M",
|
|
200
|
+
"mmproj": "mmproj-Qwen2.5-VL-7B-Instruct-f16.gguf",
|
|
201
|
+
"recipe": "llamacpp",
|
|
202
|
+
"reasoning": false,
|
|
203
|
+
"suggested": true
|
|
183
204
|
}
|
|
184
|
-
}
|
|
205
|
+
}
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|