lemonade-sdk 8.0.2__py3-none-any.whl → 8.0.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lemonade-sdk might be problematic. Click here for more details.

@@ -1,10 +1,10 @@
1
1
  lemonade/__init__.py,sha256=W1Qk7r0rnQqFhPNHp6BIBT_q-OH3s-8Q_POoVfAmKW0,117
2
2
  lemonade/api.py,sha256=X7DxBgsOl5L_z6uTkwoJWf8x0rjXWS2JoeEqmo9bMfc,3873
3
3
  lemonade/cache.py,sha256=djr2qgyUUAWlQv8FehU9qlNtCwK0IZqo82hcBDyZ3-A,2850
4
- lemonade/cli.py,sha256=XzptHh6LTl5OdGRnxiLykQ8QBl2rQmhWH5w0KPJVyY4,4359
4
+ lemonade/cli.py,sha256=9Pcs3PcrWC2F8_pcBaz09xHUICIJTvpemBdPGyXkjIk,4395
5
5
  lemonade/sequence.py,sha256=KSH7BPsiyDKsOsg_ziQKEGsDwMmuO_YbgPRBxkZd0pw,13267
6
6
  lemonade/state.py,sha256=sdSezla7Cd7KYL90xY3p9kcNV4ndSyN6UvNLOr3vBMA,5261
7
- lemonade/version.py,sha256=hTIZ_8cc-ggqcFeOYQQKOHudFQCQNQlM4ZltuYIIjD4,22
7
+ lemonade/version.py,sha256=8H4GfArMIlRTCgSsTERRXsD3PA6Y67z17oTQOJnuUME,22
8
8
  lemonade/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
9
  lemonade/common/build.py,sha256=zTb0m1-kuUx6zw5QHp2SNnVuN6jOTMQ2FCdj9iH374U,6140
10
10
  lemonade/common/cli_helpers.py,sha256=hjBfXrTtFl8gmCFlL-ksviXR0mOcdPtTWVNKoEp3PG4,4993
@@ -17,16 +17,16 @@ lemonade/common/system_info.py,sha256=qOwteG_mBo-ImilbiK7Gq37sWIE9ugF0dbWcj9zLD4
17
17
  lemonade/common/test_helpers.py,sha256=Gwk-pa_6xYAo2oro-2EJNfuouAfw8k_brCbcMC-E-r0,758
18
18
  lemonade/profilers/__init__.py,sha256=JKVonvJ4XZ9_6sKXPWsiMLQCNyzQOxhQw5BEHR1qOfU,31
19
19
  lemonade/profilers/memory_tracker.py,sha256=1iuKt0FmNVYLDnOc-oZM8dX9TUksvoxO0m2EoYWjhYQ,9367
20
- lemonade/profilers/profiler.py,sha256=y_iMGr1ToQ6rcwcIcXck4ajapisLXCfHggiV-IpPF98,1666
20
+ lemonade/profilers/profiler.py,sha256=Y5FSbc386bMlTVbqCuya9pYrso5aTthxahR1V_ZKQ9E,1902
21
21
  lemonade/tools/__init__.py,sha256=_6xRc-FHxmujoLjLjWtpYrWYEXtCSneSy-5ya01kyPk,53
22
22
  lemonade/tools/accuracy.py,sha256=9HCmczDngkBUuUrt49d2CkRo4J0qyWoFYs5cj20bGkg,11714
23
23
  lemonade/tools/adapter.py,sha256=HG54iMd6HDPZ4vnQIl7codq3HzffWbcHSIs_jVbNbhU,2958
24
24
  lemonade/tools/bench.py,sha256=aN5LMA_EH6-ZhAH3Gf26JYL7s0eKpUd3j-bReRhzvEY,10016
25
- lemonade/tools/humaneval.py,sha256=9lzsOaCSECf8LzqkQLFNwy1doAiZtK5gRN-RbZH7GLI,9532
25
+ lemonade/tools/humaneval.py,sha256=JbxuoOzvR4iyxZv4R6MI7a3gUt5ef_Jj6Ie-9VP2wzY,9531
26
26
  lemonade/tools/management_tools.py,sha256=RO-lU-hjZhrP9KD9qcLI7MrLu-Rxnkrxzn45qqwKInE,8554
27
- lemonade/tools/mmlu.py,sha256=aEp9nMKTX5yaSaVZ15YmXbWE0YugjeAacnqjMZ13hHM,11072
28
- lemonade/tools/perplexity.py,sha256=xHl4cTBpJOCNcVxXhMv6eMp8fgUQmFM0G8DeRnx_rUk,5631
29
- lemonade/tools/prompt.py,sha256=AT3p5rCGHEs9ozeGxwWl07iKF-mgLxFOkYLjU2btFHs,8638
27
+ lemonade/tools/mmlu.py,sha256=c2QaIMDzjqxCvgHlMXmy_dP1sAFkwkDxL7RO2nogI6s,11071
28
+ lemonade/tools/perplexity.py,sha256=eiaTZ3yhqF2pfwOffVbKKJLwjSri7Im2pC-tBJr7LLU,5638
29
+ lemonade/tools/prompt.py,sha256=cy6McZeLgk26xG1dJEY-cYnY2x8FUdyOOSG86WfBKCg,9348
30
30
  lemonade/tools/tool.py,sha256=UsxVYukfm_iM3BfeGYPZxQlTK5UfDfDOl3RIyLr8A1Y,13256
31
31
  lemonade/tools/huggingface/bench.py,sha256=-mTfldCtquL4mspq8ykVwDc9Mut5Ecv_jHJnSb0CYGE,6734
32
32
  lemonade/tools/huggingface/load.py,sha256=KsSGOBBD-tNEIfYC8mCWV_jpnkjHMhN3juVmC1Ln4uQ,7745
@@ -35,36 +35,36 @@ lemonade/tools/llamacpp/bench.py,sha256=A1X8ULQMxPVsff-AdiUsbWQUKpx7U7nFRNHFJRPd
35
35
  lemonade/tools/llamacpp/load.py,sha256=o3vVlefdxmdkHnuvFR3TOxiJkpNAuNFcs9Whfp24jpg,9236
36
36
  lemonade/tools/oga/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
37
37
  lemonade/tools/oga/bench.py,sha256=T3c40NevM3NA7CT98B6vBj1nXfdITDqpfMHYSjhjwpA,5061
38
- lemonade/tools/oga/load.py,sha256=7Sdf6PFPrqbadPabyJb_uPRUIP09qj21ZYdXz47MqsE,28570
38
+ lemonade/tools/oga/load.py,sha256=XSznW8lOX_KafSq5J5mIBJzj8YJEBpK0RFGcTE1wnE8,28317
39
39
  lemonade/tools/oga/utils.py,sha256=p7faMNfT-rLURC9t_s1S_STQRzzLADqbngUliTOOXeQ,16144
40
40
  lemonade/tools/quark/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
41
- lemonade/tools/quark/quark_load.py,sha256=tNy-G9yEJ5cTsxw9LmGUYmmdlEzMo_iy-KSIc2YVz6U,5581
42
- lemonade/tools/quark/quark_quantize.py,sha256=LZrcbLf9oIw7FW2ccP_qkCP32jxmz5YnNEaoY6rsAuY,16583
41
+ lemonade/tools/quark/quark_load.py,sha256=FJ4LJKTToZbHHWVEOBLadae1a3jCnnY4KvXySHbkJMA,5589
42
+ lemonade/tools/quark/quark_quantize.py,sha256=hwoaXhpBIORvJ16MvewphPkaDEQn3BAgXq5o82Gc-_s,16599
43
43
  lemonade/tools/report/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
44
44
  lemonade/tools/report/llm_report.py,sha256=bVHhwCINA-Ok2EdSwAsLubsc83N3KWOVuwTguw7jDcE,6676
45
- lemonade/tools/report/table.py,sha256=VkTv5Vd0HOXudEthCBnFMrWK73Dm2AQP2_B83vEKBzI,25129
45
+ lemonade/tools/report/table.py,sha256=wJFzKtlmGQH0RQ5O9nevtpMe_-zQ-8zNOndINQuzsjM,27793
46
46
  lemonade/tools/server/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
47
- lemonade/tools/server/llamacpp.py,sha256=aDVjjkU2Z2PN25Uuy-lk6ByKPR8kg5r2X-YsVSs4vi8,15624
48
- lemonade/tools/server/serve.py,sha256=3_jBpi6THnnAmtKOxvPlOkIhSTTmrlZE3fr2Dpto-Q4,52794
47
+ lemonade/tools/server/llamacpp.py,sha256=e1MYKSJBu-jlOE5GQSBsC9CUPAeqw5wXXxoxBKA5zb8,20038
48
+ lemonade/tools/server/serve.py,sha256=ORffC4bcBJ-L5-JbmZX91X3yHt1JWxZcIjrZuu9x8TQ,56165
49
49
  lemonade/tools/server/tool_calls.py,sha256=xrAlQwKG-nv2xLlf8f9CDSaUbyMn8ZtHkds9iZLG9K8,5230
50
- lemonade/tools/server/tray.py,sha256=OI2uCncs8UgnYFLCKHHXq06RETO2RFEcn4xLzMq-q_c,16675
50
+ lemonade/tools/server/tray.py,sha256=4Kf3x8YfRaItPW7lxlEwerD7c5Q2snzcNk3ZrEoae58,17259
51
51
  lemonade/tools/server/webapp.py,sha256=8Das5yXOaSBLZmSZ_eddJajQFxBhvl5D6GI_hHlGbE0,1040
52
52
  lemonade/tools/server/static/favicon.ico,sha256=hMmP9qGJNeZ0mFS86JIqPbZstXMZn0Z76_HfHQpREAU,126745
53
- lemonade/tools/server/static/styles.css,sha256=u-SzZ-vh5qEFMDSKLHJ7MsQwvwpJLB_DdJxocf06Sro,16880
54
- lemonade/tools/server/static/webapp.html,sha256=im7YQkwvbuqrbO-sLhStVqtA6B7HKAn2azZka1KoeJQ,21260
53
+ lemonade/tools/server/static/styles.css,sha256=x-pf7xts0te9JWAafcNFqzE7r1fl6n_H362Eiz49ixI,24722
54
+ lemonade/tools/server/static/webapp.html,sha256=AS61ZBDnZkIUpT-iZFlTnWpkp6Yeozs4obzauX4crlU,35004
55
55
  lemonade/tools/server/utils/port.py,sha256=XnIg2qS73QRrsJn6LgHcrJPmku30Tv6vsYcBVMj82K4,2186
56
56
  lemonade/tools/server/utils/system_tray.py,sha256=b9lvNv9chJKQxvmH7qzAuUe6H9HsLu7pdHFqGlAJaL0,12654
57
57
  lemonade/tools/server/utils/thread.py,sha256=pK9K_6DNWoQ78NArkAX3Ym2WsxLnCs9sKTk6TitlYnI,2804
58
58
  lemonade_install/__init__.py,sha256=26zohKg2jgr_5y7tObduWMYQg8zCTWMZHL8lfi2zZVQ,40
59
59
  lemonade_install/install.py,sha256=DJWR36QSjZtvEwRjYPNSjhYgoxLjI_6OPrCMZjL0ChY,28263
60
- lemonade_sdk-8.0.2.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
61
- lemonade_sdk-8.0.2.dist-info/licenses/NOTICE.md,sha256=B8lEqi4QE41J9ljz4Riv2JgHD1v8GCZE6nNBHO3KIA0,2135
60
+ lemonade_sdk-8.0.4.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
61
+ lemonade_sdk-8.0.4.dist-info/licenses/NOTICE.md,sha256=B8lEqi4QE41J9ljz4Riv2JgHD1v8GCZE6nNBHO3KIA0,2135
62
62
  lemonade_server/cli.py,sha256=z6ojwFaOIz0hbUbVtZWMLP4YDpkcVOmqwmdm55dhKA4,11980
63
- lemonade_server/model_manager.py,sha256=HqbahDMRv1x8jyQj4pa1rXanlPmcCykt8tlI6WfaxjE,13023
64
- lemonade_server/pydantic_models.py,sha256=nsbpHqAkd6nkz5QT16u9xMZbCXqccGiy5O0fWecOM88,2338
65
- lemonade_server/server_models.json,sha256=wTK_H9XDHLxqMWQJqbBsJwm50PhOR4gURyVj9Jm35PQ,6992
66
- lemonade_sdk-8.0.2.dist-info/METADATA,sha256=hS5Xn5Pjq0RbdLlhedz3HQMCvkRrMWFoAI0Mao4cHwg,8225
67
- lemonade_sdk-8.0.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
68
- lemonade_sdk-8.0.2.dist-info/entry_points.txt,sha256=gJppn0ETtXXR6ceKWEIRdk42kMC7ps59EmU3NCPyPUk,144
69
- lemonade_sdk-8.0.2.dist-info/top_level.txt,sha256=10ap5GNiPhalO4V50LRoxA1FqRT9g3Xkia6BITu880k,42
70
- lemonade_sdk-8.0.2.dist-info/RECORD,,
63
+ lemonade_server/model_manager.py,sha256=0HqLR38uOu_hxRWVYQ_P6YmwaR-jkDuaAqGYo60X8C0,16702
64
+ lemonade_server/pydantic_models.py,sha256=rp_FFhoTwg6jNmgol-kShwffnRDGbt7jTbIeELvgOIo,2876
65
+ lemonade_server/server_models.json,sha256=Y-j9KAvHmfv77welC0rfRao4inLBce6AVySb-oy_uNE,7519
66
+ lemonade_sdk-8.0.4.dist-info/METADATA,sha256=FqA9Jtgx1QE1EjLg_lxcfcAMI3j0cKpZxoe4GnaGLRA,7754
67
+ lemonade_sdk-8.0.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
68
+ lemonade_sdk-8.0.4.dist-info/entry_points.txt,sha256=gJppn0ETtXXR6ceKWEIRdk42kMC7ps59EmU3NCPyPUk,144
69
+ lemonade_sdk-8.0.4.dist-info/top_level.txt,sha256=10ap5GNiPhalO4V50LRoxA1FqRT9g3Xkia6BITu880k,42
70
+ lemonade_sdk-8.0.4.dist-info/RECORD,,
@@ -54,6 +54,17 @@ class ModelManager:
54
54
  for model_name, model_info in user_models.items()
55
55
  }
56
56
 
57
+ # Backwards compatibility for user models that were created before version 8.0.4
58
+ # "reasoning" was a boolean, but as of 8.0.4 it became a label
59
+ for _, model_info in user_models.items():
60
+ if "reasoning" in model_info:
61
+ model_info["labels"] = (
62
+ ["reasoning"]
63
+ if not model_info["labels"]
64
+ else model_info["labels"] + ["reasoning"]
65
+ )
66
+ del model_info["reasoning"]
67
+
57
68
  models.update(user_models)
58
69
 
59
70
  # Add the model name as a key in each entry, to make it easier
@@ -102,57 +113,131 @@ class ModelManager:
102
113
  """
103
114
  return self.filter_models_by_backend(self.downloaded_models)
104
115
 
116
+ def identify_gguf_models(
117
+ self, checkpoint: str, variant: str, mmproj: str
118
+ ) -> tuple[dict, list[str]]:
119
+ """
120
+ Identifies the GGUF model files in the repository that match the variant.
121
+ """
122
+
123
+ hint = """
124
+ The CHECKPOINT:VARIANT scheme is used to specify model files in Hugging Face repositories.
125
+
126
+ The VARIANT format can be one of several types:
127
+ 1. Full filename: exact file to download
128
+ 2. None/empty: gets the first .gguf file in the repository (excludes mmproj files)
129
+ 3. Quantization variant: find a single file ending with the variant name (case insensitive)
130
+ 4. Folder name: downloads all .gguf files in the folder that matches the variant name (case insensitive)
131
+
132
+ Examples:
133
+ - "unsloth/Qwen3-8B-GGUF:qwen3.gguf" -> downloads "qwen3.gguf"
134
+ - "unsloth/Qwen3-30B-A3B-GGUF" -> downloads "Qwen3-30B-A3B-GGUF.gguf"
135
+ - "unsloth/Qwen3-8B-GGUF:Q4_1" -> downloads "Qwen3-8B-GGUF-Q4_1.gguf"
136
+ - "unsloth/Qwen3-30B-A3B-GGUF:Q4_0" -> downloads all files in "Q4_0/" folder
137
+ """
138
+
139
+ repo_files = huggingface_hub.list_repo_files(checkpoint)
140
+ sharded_files = []
141
+
142
+ # (case 1) If variant ends in .gguf, use it directly
143
+ if variant and variant.endswith(".gguf"):
144
+ variant_name = variant
145
+ if variant_name not in repo_files:
146
+ raise ValueError(
147
+ f"File {variant} not found in Hugging Face repository {checkpoint}. {hint}"
148
+ )
149
+ # (case 2) If no variant is provided, get the first .gguf file in the repository
150
+ elif variant is None:
151
+ all_variants = [
152
+ f for f in repo_files if f.endswith(".gguf") and "mmproj" not in f
153
+ ]
154
+ if len(all_variants) == 0:
155
+ raise ValueError(
156
+ f"No .gguf files found in Hugging Face repository {checkpoint}. {hint}"
157
+ )
158
+ variant_name = all_variants[0]
159
+ else:
160
+ # (case 3) Find a single file ending with the variant name (case insensitive)
161
+ end_with_variant = [
162
+ f
163
+ for f in repo_files
164
+ if f.lower().endswith(f"{variant}.gguf".lower())
165
+ and "mmproj" not in f.lower()
166
+ ]
167
+ if len(end_with_variant) == 1:
168
+ variant_name = end_with_variant[0]
169
+ elif len(end_with_variant) > 1:
170
+ raise ValueError(
171
+ f"Multiple .gguf files found for variant {variant}, but only one is allowed. {hint}"
172
+ )
173
+ # (case 4) Check whether the variant corresponds to a folder with sharded files (case insensitive)
174
+ else:
175
+ sharded_files = [
176
+ f
177
+ for f in repo_files
178
+ if f.endswith(".gguf")
179
+ and f.lower().startswith(f"{variant}/".lower())
180
+ ]
181
+
182
+ if not sharded_files:
183
+ raise ValueError(
184
+ f"No .gguf files found for variant {variant}. {hint}"
185
+ )
186
+
187
+ # Sort to ensure consistent ordering
188
+ sharded_files.sort()
189
+
190
+ # Use first file as primary (this is how llamacpp handles it)
191
+ variant_name = sharded_files[0]
192
+
193
+ core_files = {"variant": variant_name}
194
+
195
+ # If there is a mmproj file, add it to the patterns
196
+ if mmproj:
197
+ if mmproj not in repo_files:
198
+ raise ValueError(
199
+ f"The provided mmproj file {mmproj} was not found in {checkpoint}."
200
+ )
201
+ core_files["mmproj"] = mmproj
202
+
203
+ return core_files, sharded_files
204
+
105
205
  def download_gguf(self, model_config: PullConfig) -> dict:
106
206
  """
107
207
  Downloads the GGUF file for the given model configuration.
208
+
209
+ For sharded models, if the variant points to a folder (e.g. Q4_0), all files in that folder
210
+ will be downloaded but only the first file will be returned for loading.
108
211
  """
109
212
 
110
- # The variant parameter can be either:
111
- # 1. A full GGUF filename (e.g. "model-Q4_0.gguf")
112
- # 2. A quantization variant (e.g. "Q4_0")
113
- # This code handles both cases by constructing the appropriate filename
213
+ # This code handles all cases by constructing the appropriate filename or pattern
114
214
  checkpoint, variant = self.parse_checkpoint(model_config.checkpoint)
115
- hf_base_name = checkpoint.split("/")[-1].replace("-GGUF", "")
116
- variant_name = (
117
- variant if variant.endswith(".gguf") else f"{hf_base_name}-{variant}.gguf"
118
- )
119
215
 
120
- # If there is a mmproj file, add it to the patterns
121
- expected_files = {"variant": variant_name}
122
- if model_config.mmproj:
123
- expected_files["mmproj"] = model_config.mmproj
216
+ # Identify the GGUF model files in the repository that match the variant
217
+ core_files, sharded_files = self.identify_gguf_models(
218
+ checkpoint, variant, model_config.mmproj
219
+ )
124
220
 
125
221
  # Download the files
126
222
  snapshot_folder = huggingface_hub.snapshot_download(
127
223
  repo_id=checkpoint,
128
- allow_patterns=list(expected_files.values()),
224
+ allow_patterns=list(core_files.values()) + sharded_files,
129
225
  )
130
226
 
131
- # Make sure we downloaded something
132
- # If we didn't that can indicate that no patterns from allow_patterns match
133
- # any files in the HF repo
134
- if not os.path.exists(snapshot_folder):
135
- raise ValueError(
136
- "No patterns matched the variant parameter (CHECKPOINT:VARIANT). "
137
- "Try again, providing the full filename of your target .gguf file as the variant."
138
- " For example: Qwen/Qwen2.5-Coder-3B-Instruct-GGUF:"
139
- "qwen2.5-coder-3b-instruct-q4_0.gguf"
140
- )
141
-
142
- # Ensure we downloaded all expected files while creating a dict of the downloaded files
143
- snapshot_files = {}
144
- for file in expected_files:
145
- snapshot_files[file] = os.path.join(snapshot_folder, expected_files[file])
146
- if expected_files[file].lower() not in [
147
- name.lower() for name in os.listdir(snapshot_folder)
148
- ]:
227
+ # Ensure we downloaded all expected files
228
+ for file in list(core_files.values()) + sharded_files:
229
+ expected_path = os.path.join(snapshot_folder, file)
230
+ if not os.path.exists(expected_path):
149
231
  raise ValueError(
150
232
  f"Hugging Face snapshot download for {model_config.checkpoint} "
151
- f"expected file {expected_files[file]} not found in {snapshot_folder}"
233
+ f"expected file {file} not found at {expected_path}"
152
234
  )
153
235
 
154
- # Return a dict that points to the snapshot path of the downloaded GGUF files
155
- return snapshot_files
236
+ # Return a dict of the full path of the core GGUF files
237
+ return {
238
+ file_name: os.path.join(snapshot_folder, file_path)
239
+ for file_name, file_path in core_files.items()
240
+ }
156
241
 
157
242
  def download_models(
158
243
  self,
@@ -194,9 +279,8 @@ class ModelManager:
194
279
  new_user_model = {
195
280
  "checkpoint": checkpoint,
196
281
  "recipe": recipe,
197
- "reasoning": reasoning,
198
282
  "suggested": True,
199
- "labels": ["custom"],
283
+ "labels": ["custom"] + (["reasoning"] if reasoning else []),
200
284
  }
201
285
 
202
286
  if mmproj:
@@ -249,6 +333,9 @@ class ModelManager:
249
333
 
250
334
  user_models[model_name] = new_user_model
251
335
 
336
+ # Ensure the cache directory exists before writing the file
337
+ os.makedirs(os.path.dirname(USER_MODELS_FILE), exist_ok=True)
338
+
252
339
  with open(USER_MODELS_FILE, mode="w", encoding="utf-8") as file:
253
340
  json.dump(user_models, fp=file)
254
341
 
@@ -1,4 +1,4 @@
1
- from typing import Optional
1
+ from typing import Optional, Union, List, Any
2
2
 
3
3
  from pydantic import BaseModel
4
4
 
@@ -65,6 +65,30 @@ class ChatCompletionRequest(BaseModel):
65
65
  response_format: dict | None = None
66
66
 
67
67
 
68
+ class EmbeddingsRequest(BaseModel):
69
+ """
70
+ Request model for embeddings API endpoint.
71
+
72
+ Generates embeddings for the provided input text or tokens.
73
+ """
74
+
75
+ input: Union[str, List]
76
+ model: Optional[str] = None
77
+ encoding_format: Optional[str] = "float" # "float" or "base64"
78
+
79
+
80
+ class RerankingRequest(BaseModel):
81
+ """
82
+ Request model for reranking API endpoint.
83
+
84
+ Reranks a list of documents based on their relevance to a query.
85
+ """
86
+
87
+ query: str
88
+ documents: List[str]
89
+ model: str
90
+
91
+
68
92
  class ResponsesRequest(BaseModel):
69
93
  """
70
94
  Request model for responses API endpoint.
@@ -2,197 +2,177 @@
2
2
  "Qwen2.5-0.5B-Instruct-CPU": {
3
3
  "checkpoint": "amd/Qwen2.5-0.5B-Instruct-quantized_int4-float16-cpu-onnx",
4
4
  "recipe": "oga-cpu",
5
- "reasoning": false,
6
5
  "suggested": true
7
6
  },
8
7
  "Llama-3.2-1B-Instruct-CPU": {
9
8
  "checkpoint": "amd/Llama-3.2-1B-Instruct-awq-uint4-float16-cpu-onnx",
10
9
  "recipe": "oga-cpu",
11
- "reasoning": false,
12
10
  "suggested": false
13
11
  },
14
12
  "Llama-3.2-3B-Instruct-CPU": {
15
13
  "checkpoint": "amd/Llama-3.2-3B-Instruct-awq-uint4-float16-cpu-onnx",
16
14
  "recipe": "oga-cpu",
17
- "reasoning": false,
18
15
  "suggested": false
19
16
  },
20
17
  "Phi-3-Mini-Instruct-CPU": {
21
18
  "checkpoint": "amd/Phi-3-mini-4k-instruct_int4_float16_onnx_cpu",
22
19
  "recipe": "oga-cpu",
23
- "reasoning": false,
24
20
  "suggested": true
25
21
  },
26
22
  "Qwen-1.5-7B-Chat-CPU": {
27
23
  "checkpoint": "amd/Qwen1.5-7B-Chat_uint4_asym_g128_float16_onnx_cpu",
28
24
  "recipe": "oga-cpu",
29
- "reasoning": false,
30
25
  "suggested": true
31
26
  },
32
27
  "DeepSeek-R1-Distill-Llama-8B-CPU": {
33
28
  "checkpoint": "amd/DeepSeek-R1-Distill-Llama-8B-awq-asym-uint4-g128-lmhead-onnx-cpu",
34
29
  "recipe": "oga-cpu",
35
- "reasoning": true,
36
- "suggested": true
30
+ "suggested": true,
31
+ "labels": ["reasoning"]
37
32
  },
38
33
  "DeepSeek-R1-Distill-Qwen-7B-CPU": {
39
34
  "checkpoint": "amd/DeepSeek-R1-Distill-Llama-8B-awq-asym-uint4-g128-lmhead-onnx-cpu",
40
35
  "recipe": "oga-cpu",
41
- "reasoning": true,
42
- "suggested": true
36
+ "suggested": true,
37
+ "labels": ["reasoning"]
43
38
  },
44
39
  "Llama-3.2-1B-Instruct-Hybrid": {
45
40
  "checkpoint": "amd/Llama-3.2-1B-Instruct-awq-g128-int4-asym-fp16-onnx-hybrid",
46
41
  "recipe": "oga-hybrid",
47
- "reasoning": false,
48
42
  "max_prompt_length": 3000,
49
43
  "suggested": true
50
44
  },
51
45
  "Llama-3.2-3B-Instruct-Hybrid": {
52
46
  "checkpoint": "amd/Llama-3.2-3B-Instruct-awq-g128-int4-asym-fp16-onnx-hybrid",
53
47
  "recipe": "oga-hybrid",
54
- "reasoning": false,
55
48
  "max_prompt_length": 2000,
56
49
  "suggested": true
57
50
  },
58
51
  "Phi-3-Mini-Instruct-Hybrid": {
59
52
  "checkpoint": "amd/Phi-3-mini-4k-instruct-awq-g128-int4-asym-fp16-onnx-hybrid",
60
53
  "recipe": "oga-hybrid",
61
- "reasoning": false,
62
54
  "max_prompt_length": 2000,
63
55
  "suggested": true
64
56
  },
65
57
  "Phi-3.5-Mini-Instruct-Hybrid": {
66
58
  "checkpoint": "amd/Phi-3.5-mini-instruct-awq-g128-int4-asym-fp16-onnx-hybrid",
67
59
  "recipe": "oga-hybrid",
68
- "reasoning": false,
69
60
  "suggested": false
70
61
  },
71
62
  "Qwen-1.5-7B-Chat-Hybrid": {
72
63
  "checkpoint": "amd/Qwen1.5-7B-Chat-awq-g128-int4-asym-fp16-onnx-hybrid",
73
64
  "recipe": "oga-hybrid",
74
- "reasoning": false,
75
65
  "max_prompt_length": 3000,
76
66
  "suggested": true
77
67
  },
78
68
  "DeepSeek-R1-Distill-Llama-8B-Hybrid": {
79
69
  "checkpoint": "amd/DeepSeek-R1-Distill-Llama-8B-awq-asym-uint4-g128-lmhead-onnx-hybrid",
80
70
  "recipe": "oga-hybrid",
81
- "reasoning": true,
82
71
  "max_prompt_length": 2000,
83
- "suggested": true
72
+ "suggested": true,
73
+ "labels": ["reasoning"]
84
74
  },
85
75
  "DeepSeek-R1-Distill-Qwen-7B-Hybrid": {
86
76
  "checkpoint": "amd/DeepSeek-R1-Distill-Qwen-7B-awq-asym-uint4-g128-lmhead-onnx-hybrid",
87
77
  "recipe": "oga-hybrid",
88
- "reasoning": true,
89
78
  "max_prompt_length": 2000,
90
- "suggested": true
79
+ "suggested": true,
80
+ "labels": ["reasoning"]
91
81
  },
92
82
  "Mistral-7B-v0.3-Instruct-Hybrid": {
93
83
  "checkpoint": "amd/Mistral-7B-Instruct-v0.3-awq-g128-int4-asym-fp16-onnx-hybrid",
94
84
  "recipe": "oga-hybrid",
95
- "reasoning": false,
96
85
  "max_prompt_length": 2000,
97
86
  "suggested": true
98
87
  },
99
88
  "Llama-3.1-8B-Instruct-Hybrid": {
100
89
  "checkpoint": "amd/Llama-3.1-8B-Instruct-awq-asym-uint4-g128-lmhead-onnx-hybrid",
101
90
  "recipe": "oga-hybrid",
102
- "reasoning": false,
103
91
  "max_prompt_length": 2000,
104
92
  "suggested": true
105
93
  },
106
94
  "Llama-xLAM-2-8b-fc-r-Hybrid": {
107
95
  "checkpoint": "amd/Llama-xLAM-2-8b-fc-r-awq-g128-int4-asym-bfp16-onnx-hybrid",
108
96
  "recipe": "oga-hybrid",
109
- "reasoning": false,
110
97
  "max_prompt_length": 2000,
111
98
  "suggested": true
112
99
  },
113
100
  "Llama-3.2-1B-Instruct-DirectML": {
114
101
  "checkpoint": "amd/Llama-3.2-1B-Instruct-dml-int4-awq-block-128-directml",
115
102
  "recipe": "oga-igpu",
116
- "reasoning": false,
117
103
  "suggested": false
118
104
  },
119
105
  "Llama-3.2-3B-Instruct-DirectML": {
120
106
  "checkpoint": "amd/Llama-3.2-3B-Instruct-dml-int4-awq-block-128-directml",
121
107
  "recipe": "oga-igpu",
122
- "reasoning": false,
123
108
  "suggested": false
124
109
  },
125
110
  "Phi-3.5-Mini-Instruct-DirectML": {
126
111
  "checkpoint": "amd/phi3.5-mini-instruct-int4-awq-block-128-directml",
127
112
  "recipe": "oga-igpu",
128
- "reasoning": false,
129
113
  "suggested": false
130
114
  },
131
115
  "Qwen-1.5-7B-Chat-DirectML": {
132
116
  "checkpoint": "amd/Qwen1.5-7B-Chat-dml-int4-awq-block-128-directml",
133
117
  "recipe": "oga-igpu",
134
- "reasoning": false,
135
118
  "suggested": false
136
119
  },
137
120
  "Mistral-7B-v0.1-Instruct-DirectML": {
138
121
  "checkpoint": "amd/Mistral-7B-Instruct-v0.1-awq-g128-int4-onnx-directml",
139
122
  "recipe": "oga-igpu",
140
- "reasoning": false,
141
123
  "suggested": false
142
124
  },
143
125
  "Llama-3-8B-Instruct-DirectML": {
144
126
  "checkpoint": "amd/llama3-8b-instruct-awq-g128-int4-onnx-directml",
145
127
  "recipe": "oga-igpu",
146
- "reasoning": false,
147
128
  "suggested": false
148
129
  },
149
130
  "Qwen3-0.6B-GGUF": {
150
131
  "checkpoint": "unsloth/Qwen3-0.6B-GGUF:Q4_0",
151
132
  "recipe": "llamacpp",
152
- "reasoning": true,
153
- "suggested": true
133
+ "suggested": true,
134
+ "labels": ["reasoning"]
154
135
  },
155
136
  "Qwen3-1.7B-GGUF": {
156
137
  "checkpoint": "unsloth/Qwen3-1.7B-GGUF:Q4_0",
157
138
  "recipe": "llamacpp",
158
- "reasoning": true,
159
- "suggested": true
139
+ "suggested": true,
140
+ "labels": ["reasoning"]
160
141
  },
161
142
  "Qwen3-4B-GGUF": {
162
143
  "checkpoint": "unsloth/Qwen3-4B-GGUF:Q4_0",
163
144
  "recipe": "llamacpp",
164
- "reasoning": true,
165
- "suggested": true
145
+ "suggested": true,
146
+ "labels": ["reasoning"]
166
147
  },
167
148
  "Qwen3-8B-GGUF": {
168
149
  "checkpoint": "unsloth/Qwen3-8B-GGUF:Q4_1",
169
150
  "recipe": "llamacpp",
170
- "reasoning": true,
171
- "suggested": true
151
+ "suggested": true,
152
+ "labels": ["reasoning"]
172
153
  },
173
154
  "DeepSeek-Qwen3-8B-GGUF": {
174
155
  "checkpoint": "unsloth/DeepSeek-R1-0528-Qwen3-8B-GGUF:Q4_1",
175
156
  "recipe": "llamacpp",
176
- "reasoning": true,
177
- "suggested": true
157
+ "suggested": true,
158
+ "labels": ["reasoning"]
178
159
  },
179
160
  "Qwen3-14B-GGUF": {
180
161
  "checkpoint": "unsloth/Qwen3-14B-GGUF:Q4_0",
181
162
  "recipe": "llamacpp",
182
- "reasoning": true,
183
- "suggested": true
163
+ "suggested": true,
164
+ "labels": ["reasoning"]
184
165
  },
185
166
  "Qwen3-30B-A3B-GGUF": {
186
167
  "checkpoint": "unsloth/Qwen3-30B-A3B-GGUF:Q4_0",
187
168
  "recipe": "llamacpp",
188
- "reasoning": true,
189
- "suggested": true
169
+ "suggested": true,
170
+ "labels": ["reasoning"]
190
171
  },
191
172
  "Gemma-3-4b-it-GGUF": {
192
173
  "checkpoint": "ggml-org/gemma-3-4b-it-GGUF:Q4_K_M",
193
174
  "mmproj": "mmproj-model-f16.gguf",
194
175
  "recipe": "llamacpp",
195
- "reasoning": false,
196
176
  "suggested": true,
197
177
  "labels": ["vision"]
198
178
  },
@@ -200,8 +180,38 @@
200
180
  "checkpoint": "ggml-org/Qwen2.5-VL-7B-Instruct-GGUF:Q4_K_M",
201
181
  "mmproj": "mmproj-Qwen2.5-VL-7B-Instruct-f16.gguf",
202
182
  "recipe": "llamacpp",
203
- "reasoning": false,
204
183
  "suggested": true,
205
184
  "labels": ["vision"]
185
+ },
186
+ "Llama-4-Scout-17B-16E-Instruct-GGUF": {
187
+ "checkpoint": "unsloth/Llama-4-Scout-17B-16E-Instruct-GGUF:Q4_K_S",
188
+ "mmproj": "mmproj-F16.gguf",
189
+ "recipe": "llamacpp",
190
+ "suggested": true,
191
+ "labels": ["vision"]
192
+ },
193
+ "nomic-embed-text-v1-GGUF": {
194
+ "checkpoint": "nomic-ai/nomic-embed-text-v1-GGUF:Q4_K_S",
195
+ "recipe": "llamacpp",
196
+ "suggested": true,
197
+ "labels": ["embeddings"]
198
+ },
199
+ "nomic-embed-text-v2-moe-GGUF": {
200
+ "checkpoint": "nomic-ai/nomic-embed-text-v2-moe-GGUF:Q8_0",
201
+ "recipe": "llamacpp",
202
+ "suggested": true,
203
+ "labels": ["embeddings"]
204
+ },
205
+ "bge-reranker-v2-m3-GGUF": {
206
+ "checkpoint": "pqnet/bge-reranker-v2-m3-Q8_0-GGUF",
207
+ "recipe": "llamacpp",
208
+ "suggested": true,
209
+ "labels": ["reranking"]
210
+ },
211
+ "jina-reranker-v1-tiny-en-GGUF": {
212
+ "checkpoint": "mradermacher/jina-reranker-v1-tiny-en-GGUF:Q8_0",
213
+ "recipe": "llamacpp",
214
+ "suggested": false,
215
+ "labels": ["reranking"]
206
216
  }
207
217
  }