lemonade-sdk 7.0.1__py3-none-any.whl → 7.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lemonade-sdk might be problematic. Click here for more details.

@@ -1,7 +1,8 @@
1
1
  import json
2
2
  import os
3
3
  import huggingface_hub
4
- import pkg_resources
4
+ from importlib.metadata import distributions
5
+ from lemonade_server.pydantic_models import LoadConfig
5
6
 
6
7
 
7
8
  class ModelManager:
@@ -64,16 +65,45 @@ class ModelManager:
64
65
  """
65
66
  return self.filter_models_by_backend(self.downloaded_models)
66
67
 
67
- def download_gguf(self, checkpoint) -> str:
68
- # The colon after the checkpoint name indicates which
69
- # specific GGUF to download
70
- repo_id = checkpoint.split(":")[0]
71
- pattern_to_match = f'*{checkpoint.split(":")[1]}.gguf'
72
- return huggingface_hub.snapshot_download(
73
- repo_id=repo_id,
74
- allow_patterns=[pattern_to_match],
68
+ def download_gguf(self, model_config: LoadConfig) -> dict:
69
+ """
70
+ Downloads the GGUF file for the given model configuration.
71
+ """
72
+
73
+ # The variant parameter can be either:
74
+ # 1. A full GGUF filename (e.g. "model-Q4_0.gguf")
75
+ # 2. A quantization variant (e.g. "Q4_0")
76
+ # This code handles both cases by constructing the appropriate filename
77
+ checkpoint, variant = model_config.checkpoint.split(":")
78
+ hf_base_name = checkpoint.split("/")[-1].replace("-GGUF", "")
79
+ variant_name = (
80
+ variant if variant.endswith(".gguf") else f"{hf_base_name}-{variant}.gguf"
81
+ )
82
+
83
+ # If there is a mmproj file, add it to the patterns
84
+ expected_files = {"variant": variant_name}
85
+ if model_config.mmproj:
86
+ expected_files["mmproj"] = model_config.mmproj
87
+
88
+ # Download the files
89
+ snapshot_folder = huggingface_hub.snapshot_download(
90
+ repo_id=checkpoint,
91
+ allow_patterns=list(expected_files.values()),
75
92
  )
76
93
 
94
+ # Ensure we downloaded all expected files while creating a dict of the downloaded files
95
+ snapshot_files = {}
96
+ for file in expected_files:
97
+ snapshot_files[file] = os.path.join(snapshot_folder, expected_files[file])
98
+ if expected_files[file] not in os.listdir(snapshot_folder):
99
+ raise ValueError(
100
+ f"Hugging Face snapshot download for {model_config.checkpoint} "
101
+ f"expected file {expected_files[file]} not found in {snapshot_folder}"
102
+ )
103
+
104
+ # Return a dict that points to the snapshot path of the downloaded GGUF files
105
+ return snapshot_files
106
+
77
107
  def download_models(self, models: list[str]):
78
108
  """
79
109
  Downloads the specified models from Hugging Face.
@@ -88,7 +118,8 @@ class ModelManager:
88
118
  print(f"Downloading {model} ({checkpoint})")
89
119
 
90
120
  if "gguf" in checkpoint.lower():
91
- self.download_gguf(checkpoint)
121
+ model_config = LoadConfig(**self.supported_models[model])
122
+ self.download_gguf(model_config)
92
123
  else:
93
124
  huggingface_hub.snapshot_download(repo_id=checkpoint)
94
125
 
@@ -97,9 +128,11 @@ class ModelManager:
97
128
  Returns a filtered dict of models that are enabled by the
98
129
  current environment.
99
130
  """
131
+ installed_packages = {dist.metadata["Name"].lower() for dist in distributions()}
132
+
100
133
  hybrid_installed = (
101
- "onnxruntime-vitisai" in pkg_resources.working_set.by_key
102
- and "onnxruntime-genai-directml-ryzenai" in pkg_resources.working_set.by_key
134
+ "onnxruntime-vitisai" in installed_packages
135
+ and "onnxruntime-genai-directml-ryzenai" in installed_packages
103
136
  )
104
137
  filtered = {}
105
138
  for model, value in models.items():
@@ -24,6 +24,8 @@ class LoadConfig(BaseModel):
24
24
  max_prompt_length: Optional[int] = None
25
25
  # Indicates whether the model is a reasoning model, like DeepSeek
26
26
  reasoning: Optional[bool] = False
27
+ # Indicates which Multimodal Projector (mmproj) file to use
28
+ mmproj: Optional[str] = None
27
29
 
28
30
 
29
31
  class CompletionRequest(BaseModel):
@@ -9,13 +9,13 @@
9
9
  "checkpoint": "amd/Llama-3.2-1B-Instruct-awq-uint4-float16-cpu-onnx",
10
10
  "recipe": "oga-cpu",
11
11
  "reasoning": false,
12
- "suggested": true
12
+ "suggested": false
13
13
  },
14
14
  "Llama-3.2-3B-Instruct-CPU": {
15
15
  "checkpoint": "amd/Llama-3.2-3B-Instruct-awq-uint4-float16-cpu-onnx",
16
16
  "recipe": "oga-cpu",
17
17
  "reasoning": false,
18
- "suggested": true
18
+ "suggested": false
19
19
  },
20
20
  "Phi-3-Mini-Instruct-CPU": {
21
21
  "checkpoint": "amd/Phi-3-mini-4k-instruct_int4_float16_onnx_cpu",
@@ -103,6 +103,13 @@
103
103
  "max_prompt_length": 2000,
104
104
  "suggested": true
105
105
  },
106
+ "Llama-xLAM-2-8b-fc-r-Hybrid": {
107
+ "checkpoint": "amd/Llama-xLAM-2-8b-fc-r-awq-g128-int4-asym-bfp16-onnx-hybrid",
108
+ "recipe": "oga-hybrid",
109
+ "reasoning": false,
110
+ "max_prompt_length": 2000,
111
+ "suggested": true
112
+ },
106
113
  "Llama-3.2-1B-Instruct-DirectML": {
107
114
  "checkpoint": "amd/Llama-3.2-1B-Instruct-dml-int4-awq-block-128-directml",
108
115
  "recipe": "oga-igpu",
@@ -158,7 +165,7 @@
158
165
  "suggested": true
159
166
  },
160
167
  "Qwen3-8B-GGUF": {
161
- "checkpoint": "unsloth/Qwen3-8B-GGUF:Q4_0",
168
+ "checkpoint": "unsloth/Qwen3-8B-GGUF:Q4_1",
162
169
  "recipe": "llamacpp",
163
170
  "reasoning": true,
164
171
  "suggested": true
@@ -180,5 +187,19 @@
180
187
  "recipe": "llamacpp",
181
188
  "reasoning": true,
182
189
  "suggested": true
190
+ },
191
+ "Gemma-3-4b-it-GGUF": {
192
+ "checkpoint": "ggml-org/gemma-3-4b-it-GGUF:Q4_K_M",
193
+ "mmproj": "mmproj-model-f16.gguf",
194
+ "recipe": "llamacpp",
195
+ "reasoning": false,
196
+ "suggested": true
197
+ },
198
+ "Qwen2.5-VL-7B-Instruct": {
199
+ "checkpoint": "ggml-org/Qwen2.5-VL-7B-Instruct-GGUF:Q4_K_M",
200
+ "mmproj": "mmproj-Qwen2.5-VL-7B-Instruct-f16.gguf",
201
+ "recipe": "llamacpp",
202
+ "reasoning": false,
203
+ "suggested": true
183
204
  }
184
- }
205
+ }