lemonade-sdk 8.0.2__py3-none-any.whl → 8.0.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lemonade-sdk might be problematic. Click here for more details.
- lemonade/cli.py +2 -2
- lemonade/profilers/profiler.py +4 -1
- lemonade/tools/humaneval.py +1 -1
- lemonade/tools/mmlu.py +1 -1
- lemonade/tools/oga/load.py +3 -9
- lemonade/tools/perplexity.py +2 -2
- lemonade/tools/prompt.py +21 -6
- lemonade/tools/quark/quark_load.py +1 -1
- lemonade/tools/quark/quark_quantize.py +2 -2
- lemonade/tools/report/table.py +80 -0
- lemonade/tools/server/llamacpp.py +148 -16
- lemonade/tools/server/serve.py +73 -0
- lemonade/tools/server/static/styles.css +424 -4
- lemonade/tools/server/static/webapp.html +337 -38
- lemonade/tools/server/tray.py +25 -9
- lemonade/version.py +1 -1
- {lemonade_sdk-8.0.2.dist-info → lemonade_sdk-8.0.4.dist-info}/METADATA +33 -36
- {lemonade_sdk-8.0.2.dist-info → lemonade_sdk-8.0.4.dist-info}/RECORD +26 -26
- lemonade_server/model_manager.py +123 -36
- lemonade_server/pydantic_models.py +25 -1
- lemonade_server/server_models.json +53 -43
- {lemonade_sdk-8.0.2.dist-info → lemonade_sdk-8.0.4.dist-info}/WHEEL +0 -0
- {lemonade_sdk-8.0.2.dist-info → lemonade_sdk-8.0.4.dist-info}/entry_points.txt +0 -0
- {lemonade_sdk-8.0.2.dist-info → lemonade_sdk-8.0.4.dist-info}/licenses/LICENSE +0 -0
- {lemonade_sdk-8.0.2.dist-info → lemonade_sdk-8.0.4.dist-info}/licenses/NOTICE.md +0 -0
- {lemonade_sdk-8.0.2.dist-info → lemonade_sdk-8.0.4.dist-info}/top_level.txt +0 -0
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
lemonade/__init__.py,sha256=W1Qk7r0rnQqFhPNHp6BIBT_q-OH3s-8Q_POoVfAmKW0,117
|
|
2
2
|
lemonade/api.py,sha256=X7DxBgsOl5L_z6uTkwoJWf8x0rjXWS2JoeEqmo9bMfc,3873
|
|
3
3
|
lemonade/cache.py,sha256=djr2qgyUUAWlQv8FehU9qlNtCwK0IZqo82hcBDyZ3-A,2850
|
|
4
|
-
lemonade/cli.py,sha256=
|
|
4
|
+
lemonade/cli.py,sha256=9Pcs3PcrWC2F8_pcBaz09xHUICIJTvpemBdPGyXkjIk,4395
|
|
5
5
|
lemonade/sequence.py,sha256=KSH7BPsiyDKsOsg_ziQKEGsDwMmuO_YbgPRBxkZd0pw,13267
|
|
6
6
|
lemonade/state.py,sha256=sdSezla7Cd7KYL90xY3p9kcNV4ndSyN6UvNLOr3vBMA,5261
|
|
7
|
-
lemonade/version.py,sha256=
|
|
7
|
+
lemonade/version.py,sha256=8H4GfArMIlRTCgSsTERRXsD3PA6Y67z17oTQOJnuUME,22
|
|
8
8
|
lemonade/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
9
9
|
lemonade/common/build.py,sha256=zTb0m1-kuUx6zw5QHp2SNnVuN6jOTMQ2FCdj9iH374U,6140
|
|
10
10
|
lemonade/common/cli_helpers.py,sha256=hjBfXrTtFl8gmCFlL-ksviXR0mOcdPtTWVNKoEp3PG4,4993
|
|
@@ -17,16 +17,16 @@ lemonade/common/system_info.py,sha256=qOwteG_mBo-ImilbiK7Gq37sWIE9ugF0dbWcj9zLD4
|
|
|
17
17
|
lemonade/common/test_helpers.py,sha256=Gwk-pa_6xYAo2oro-2EJNfuouAfw8k_brCbcMC-E-r0,758
|
|
18
18
|
lemonade/profilers/__init__.py,sha256=JKVonvJ4XZ9_6sKXPWsiMLQCNyzQOxhQw5BEHR1qOfU,31
|
|
19
19
|
lemonade/profilers/memory_tracker.py,sha256=1iuKt0FmNVYLDnOc-oZM8dX9TUksvoxO0m2EoYWjhYQ,9367
|
|
20
|
-
lemonade/profilers/profiler.py,sha256=
|
|
20
|
+
lemonade/profilers/profiler.py,sha256=Y5FSbc386bMlTVbqCuya9pYrso5aTthxahR1V_ZKQ9E,1902
|
|
21
21
|
lemonade/tools/__init__.py,sha256=_6xRc-FHxmujoLjLjWtpYrWYEXtCSneSy-5ya01kyPk,53
|
|
22
22
|
lemonade/tools/accuracy.py,sha256=9HCmczDngkBUuUrt49d2CkRo4J0qyWoFYs5cj20bGkg,11714
|
|
23
23
|
lemonade/tools/adapter.py,sha256=HG54iMd6HDPZ4vnQIl7codq3HzffWbcHSIs_jVbNbhU,2958
|
|
24
24
|
lemonade/tools/bench.py,sha256=aN5LMA_EH6-ZhAH3Gf26JYL7s0eKpUd3j-bReRhzvEY,10016
|
|
25
|
-
lemonade/tools/humaneval.py,sha256=
|
|
25
|
+
lemonade/tools/humaneval.py,sha256=JbxuoOzvR4iyxZv4R6MI7a3gUt5ef_Jj6Ie-9VP2wzY,9531
|
|
26
26
|
lemonade/tools/management_tools.py,sha256=RO-lU-hjZhrP9KD9qcLI7MrLu-Rxnkrxzn45qqwKInE,8554
|
|
27
|
-
lemonade/tools/mmlu.py,sha256=
|
|
28
|
-
lemonade/tools/perplexity.py,sha256=
|
|
29
|
-
lemonade/tools/prompt.py,sha256=
|
|
27
|
+
lemonade/tools/mmlu.py,sha256=c2QaIMDzjqxCvgHlMXmy_dP1sAFkwkDxL7RO2nogI6s,11071
|
|
28
|
+
lemonade/tools/perplexity.py,sha256=eiaTZ3yhqF2pfwOffVbKKJLwjSri7Im2pC-tBJr7LLU,5638
|
|
29
|
+
lemonade/tools/prompt.py,sha256=cy6McZeLgk26xG1dJEY-cYnY2x8FUdyOOSG86WfBKCg,9348
|
|
30
30
|
lemonade/tools/tool.py,sha256=UsxVYukfm_iM3BfeGYPZxQlTK5UfDfDOl3RIyLr8A1Y,13256
|
|
31
31
|
lemonade/tools/huggingface/bench.py,sha256=-mTfldCtquL4mspq8ykVwDc9Mut5Ecv_jHJnSb0CYGE,6734
|
|
32
32
|
lemonade/tools/huggingface/load.py,sha256=KsSGOBBD-tNEIfYC8mCWV_jpnkjHMhN3juVmC1Ln4uQ,7745
|
|
@@ -35,36 +35,36 @@ lemonade/tools/llamacpp/bench.py,sha256=A1X8ULQMxPVsff-AdiUsbWQUKpx7U7nFRNHFJRPd
|
|
|
35
35
|
lemonade/tools/llamacpp/load.py,sha256=o3vVlefdxmdkHnuvFR3TOxiJkpNAuNFcs9Whfp24jpg,9236
|
|
36
36
|
lemonade/tools/oga/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
37
37
|
lemonade/tools/oga/bench.py,sha256=T3c40NevM3NA7CT98B6vBj1nXfdITDqpfMHYSjhjwpA,5061
|
|
38
|
-
lemonade/tools/oga/load.py,sha256=
|
|
38
|
+
lemonade/tools/oga/load.py,sha256=XSznW8lOX_KafSq5J5mIBJzj8YJEBpK0RFGcTE1wnE8,28317
|
|
39
39
|
lemonade/tools/oga/utils.py,sha256=p7faMNfT-rLURC9t_s1S_STQRzzLADqbngUliTOOXeQ,16144
|
|
40
40
|
lemonade/tools/quark/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
41
|
-
lemonade/tools/quark/quark_load.py,sha256=
|
|
42
|
-
lemonade/tools/quark/quark_quantize.py,sha256=
|
|
41
|
+
lemonade/tools/quark/quark_load.py,sha256=FJ4LJKTToZbHHWVEOBLadae1a3jCnnY4KvXySHbkJMA,5589
|
|
42
|
+
lemonade/tools/quark/quark_quantize.py,sha256=hwoaXhpBIORvJ16MvewphPkaDEQn3BAgXq5o82Gc-_s,16599
|
|
43
43
|
lemonade/tools/report/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
44
44
|
lemonade/tools/report/llm_report.py,sha256=bVHhwCINA-Ok2EdSwAsLubsc83N3KWOVuwTguw7jDcE,6676
|
|
45
|
-
lemonade/tools/report/table.py,sha256=
|
|
45
|
+
lemonade/tools/report/table.py,sha256=wJFzKtlmGQH0RQ5O9nevtpMe_-zQ-8zNOndINQuzsjM,27793
|
|
46
46
|
lemonade/tools/server/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
47
|
-
lemonade/tools/server/llamacpp.py,sha256=
|
|
48
|
-
lemonade/tools/server/serve.py,sha256=
|
|
47
|
+
lemonade/tools/server/llamacpp.py,sha256=e1MYKSJBu-jlOE5GQSBsC9CUPAeqw5wXXxoxBKA5zb8,20038
|
|
48
|
+
lemonade/tools/server/serve.py,sha256=ORffC4bcBJ-L5-JbmZX91X3yHt1JWxZcIjrZuu9x8TQ,56165
|
|
49
49
|
lemonade/tools/server/tool_calls.py,sha256=xrAlQwKG-nv2xLlf8f9CDSaUbyMn8ZtHkds9iZLG9K8,5230
|
|
50
|
-
lemonade/tools/server/tray.py,sha256=
|
|
50
|
+
lemonade/tools/server/tray.py,sha256=4Kf3x8YfRaItPW7lxlEwerD7c5Q2snzcNk3ZrEoae58,17259
|
|
51
51
|
lemonade/tools/server/webapp.py,sha256=8Das5yXOaSBLZmSZ_eddJajQFxBhvl5D6GI_hHlGbE0,1040
|
|
52
52
|
lemonade/tools/server/static/favicon.ico,sha256=hMmP9qGJNeZ0mFS86JIqPbZstXMZn0Z76_HfHQpREAU,126745
|
|
53
|
-
lemonade/tools/server/static/styles.css,sha256=
|
|
54
|
-
lemonade/tools/server/static/webapp.html,sha256=
|
|
53
|
+
lemonade/tools/server/static/styles.css,sha256=x-pf7xts0te9JWAafcNFqzE7r1fl6n_H362Eiz49ixI,24722
|
|
54
|
+
lemonade/tools/server/static/webapp.html,sha256=AS61ZBDnZkIUpT-iZFlTnWpkp6Yeozs4obzauX4crlU,35004
|
|
55
55
|
lemonade/tools/server/utils/port.py,sha256=XnIg2qS73QRrsJn6LgHcrJPmku30Tv6vsYcBVMj82K4,2186
|
|
56
56
|
lemonade/tools/server/utils/system_tray.py,sha256=b9lvNv9chJKQxvmH7qzAuUe6H9HsLu7pdHFqGlAJaL0,12654
|
|
57
57
|
lemonade/tools/server/utils/thread.py,sha256=pK9K_6DNWoQ78NArkAX3Ym2WsxLnCs9sKTk6TitlYnI,2804
|
|
58
58
|
lemonade_install/__init__.py,sha256=26zohKg2jgr_5y7tObduWMYQg8zCTWMZHL8lfi2zZVQ,40
|
|
59
59
|
lemonade_install/install.py,sha256=DJWR36QSjZtvEwRjYPNSjhYgoxLjI_6OPrCMZjL0ChY,28263
|
|
60
|
-
lemonade_sdk-8.0.
|
|
61
|
-
lemonade_sdk-8.0.
|
|
60
|
+
lemonade_sdk-8.0.4.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
61
|
+
lemonade_sdk-8.0.4.dist-info/licenses/NOTICE.md,sha256=B8lEqi4QE41J9ljz4Riv2JgHD1v8GCZE6nNBHO3KIA0,2135
|
|
62
62
|
lemonade_server/cli.py,sha256=z6ojwFaOIz0hbUbVtZWMLP4YDpkcVOmqwmdm55dhKA4,11980
|
|
63
|
-
lemonade_server/model_manager.py,sha256=
|
|
64
|
-
lemonade_server/pydantic_models.py,sha256=
|
|
65
|
-
lemonade_server/server_models.json,sha256=
|
|
66
|
-
lemonade_sdk-8.0.
|
|
67
|
-
lemonade_sdk-8.0.
|
|
68
|
-
lemonade_sdk-8.0.
|
|
69
|
-
lemonade_sdk-8.0.
|
|
70
|
-
lemonade_sdk-8.0.
|
|
63
|
+
lemonade_server/model_manager.py,sha256=0HqLR38uOu_hxRWVYQ_P6YmwaR-jkDuaAqGYo60X8C0,16702
|
|
64
|
+
lemonade_server/pydantic_models.py,sha256=rp_FFhoTwg6jNmgol-kShwffnRDGbt7jTbIeELvgOIo,2876
|
|
65
|
+
lemonade_server/server_models.json,sha256=Y-j9KAvHmfv77welC0rfRao4inLBce6AVySb-oy_uNE,7519
|
|
66
|
+
lemonade_sdk-8.0.4.dist-info/METADATA,sha256=FqA9Jtgx1QE1EjLg_lxcfcAMI3j0cKpZxoe4GnaGLRA,7754
|
|
67
|
+
lemonade_sdk-8.0.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
68
|
+
lemonade_sdk-8.0.4.dist-info/entry_points.txt,sha256=gJppn0ETtXXR6ceKWEIRdk42kMC7ps59EmU3NCPyPUk,144
|
|
69
|
+
lemonade_sdk-8.0.4.dist-info/top_level.txt,sha256=10ap5GNiPhalO4V50LRoxA1FqRT9g3Xkia6BITu880k,42
|
|
70
|
+
lemonade_sdk-8.0.4.dist-info/RECORD,,
|
lemonade_server/model_manager.py
CHANGED
|
@@ -54,6 +54,17 @@ class ModelManager:
|
|
|
54
54
|
for model_name, model_info in user_models.items()
|
|
55
55
|
}
|
|
56
56
|
|
|
57
|
+
# Backwards compatibility for user models that were created before version 8.0.4
|
|
58
|
+
# "reasoning" was a boolean, but as of 8.0.4 it became a label
|
|
59
|
+
for _, model_info in user_models.items():
|
|
60
|
+
if "reasoning" in model_info:
|
|
61
|
+
model_info["labels"] = (
|
|
62
|
+
["reasoning"]
|
|
63
|
+
if not model_info["labels"]
|
|
64
|
+
else model_info["labels"] + ["reasoning"]
|
|
65
|
+
)
|
|
66
|
+
del model_info["reasoning"]
|
|
67
|
+
|
|
57
68
|
models.update(user_models)
|
|
58
69
|
|
|
59
70
|
# Add the model name as a key in each entry, to make it easier
|
|
@@ -102,57 +113,131 @@ class ModelManager:
|
|
|
102
113
|
"""
|
|
103
114
|
return self.filter_models_by_backend(self.downloaded_models)
|
|
104
115
|
|
|
116
|
+
def identify_gguf_models(
|
|
117
|
+
self, checkpoint: str, variant: str, mmproj: str
|
|
118
|
+
) -> tuple[dict, list[str]]:
|
|
119
|
+
"""
|
|
120
|
+
Identifies the GGUF model files in the repository that match the variant.
|
|
121
|
+
"""
|
|
122
|
+
|
|
123
|
+
hint = """
|
|
124
|
+
The CHECKPOINT:VARIANT scheme is used to specify model files in Hugging Face repositories.
|
|
125
|
+
|
|
126
|
+
The VARIANT format can be one of several types:
|
|
127
|
+
1. Full filename: exact file to download
|
|
128
|
+
2. None/empty: gets the first .gguf file in the repository (excludes mmproj files)
|
|
129
|
+
3. Quantization variant: find a single file ending with the variant name (case insensitive)
|
|
130
|
+
4. Folder name: downloads all .gguf files in the folder that matches the variant name (case insensitive)
|
|
131
|
+
|
|
132
|
+
Examples:
|
|
133
|
+
- "unsloth/Qwen3-8B-GGUF:qwen3.gguf" -> downloads "qwen3.gguf"
|
|
134
|
+
- "unsloth/Qwen3-30B-A3B-GGUF" -> downloads "Qwen3-30B-A3B-GGUF.gguf"
|
|
135
|
+
- "unsloth/Qwen3-8B-GGUF:Q4_1" -> downloads "Qwen3-8B-GGUF-Q4_1.gguf"
|
|
136
|
+
- "unsloth/Qwen3-30B-A3B-GGUF:Q4_0" -> downloads all files in "Q4_0/" folder
|
|
137
|
+
"""
|
|
138
|
+
|
|
139
|
+
repo_files = huggingface_hub.list_repo_files(checkpoint)
|
|
140
|
+
sharded_files = []
|
|
141
|
+
|
|
142
|
+
# (case 1) If variant ends in .gguf, use it directly
|
|
143
|
+
if variant and variant.endswith(".gguf"):
|
|
144
|
+
variant_name = variant
|
|
145
|
+
if variant_name not in repo_files:
|
|
146
|
+
raise ValueError(
|
|
147
|
+
f"File {variant} not found in Hugging Face repository {checkpoint}. {hint}"
|
|
148
|
+
)
|
|
149
|
+
# (case 2) If no variant is provided, get the first .gguf file in the repository
|
|
150
|
+
elif variant is None:
|
|
151
|
+
all_variants = [
|
|
152
|
+
f for f in repo_files if f.endswith(".gguf") and "mmproj" not in f
|
|
153
|
+
]
|
|
154
|
+
if len(all_variants) == 0:
|
|
155
|
+
raise ValueError(
|
|
156
|
+
f"No .gguf files found in Hugging Face repository {checkpoint}. {hint}"
|
|
157
|
+
)
|
|
158
|
+
variant_name = all_variants[0]
|
|
159
|
+
else:
|
|
160
|
+
# (case 3) Find a single file ending with the variant name (case insensitive)
|
|
161
|
+
end_with_variant = [
|
|
162
|
+
f
|
|
163
|
+
for f in repo_files
|
|
164
|
+
if f.lower().endswith(f"{variant}.gguf".lower())
|
|
165
|
+
and "mmproj" not in f.lower()
|
|
166
|
+
]
|
|
167
|
+
if len(end_with_variant) == 1:
|
|
168
|
+
variant_name = end_with_variant[0]
|
|
169
|
+
elif len(end_with_variant) > 1:
|
|
170
|
+
raise ValueError(
|
|
171
|
+
f"Multiple .gguf files found for variant {variant}, but only one is allowed. {hint}"
|
|
172
|
+
)
|
|
173
|
+
# (case 4) Check whether the variant corresponds to a folder with sharded files (case insensitive)
|
|
174
|
+
else:
|
|
175
|
+
sharded_files = [
|
|
176
|
+
f
|
|
177
|
+
for f in repo_files
|
|
178
|
+
if f.endswith(".gguf")
|
|
179
|
+
and f.lower().startswith(f"{variant}/".lower())
|
|
180
|
+
]
|
|
181
|
+
|
|
182
|
+
if not sharded_files:
|
|
183
|
+
raise ValueError(
|
|
184
|
+
f"No .gguf files found for variant {variant}. {hint}"
|
|
185
|
+
)
|
|
186
|
+
|
|
187
|
+
# Sort to ensure consistent ordering
|
|
188
|
+
sharded_files.sort()
|
|
189
|
+
|
|
190
|
+
# Use first file as primary (this is how llamacpp handles it)
|
|
191
|
+
variant_name = sharded_files[0]
|
|
192
|
+
|
|
193
|
+
core_files = {"variant": variant_name}
|
|
194
|
+
|
|
195
|
+
# If there is a mmproj file, add it to the patterns
|
|
196
|
+
if mmproj:
|
|
197
|
+
if mmproj not in repo_files:
|
|
198
|
+
raise ValueError(
|
|
199
|
+
f"The provided mmproj file {mmproj} was not found in {checkpoint}."
|
|
200
|
+
)
|
|
201
|
+
core_files["mmproj"] = mmproj
|
|
202
|
+
|
|
203
|
+
return core_files, sharded_files
|
|
204
|
+
|
|
105
205
|
def download_gguf(self, model_config: PullConfig) -> dict:
|
|
106
206
|
"""
|
|
107
207
|
Downloads the GGUF file for the given model configuration.
|
|
208
|
+
|
|
209
|
+
For sharded models, if the variant points to a folder (e.g. Q4_0), all files in that folder
|
|
210
|
+
will be downloaded but only the first file will be returned for loading.
|
|
108
211
|
"""
|
|
109
212
|
|
|
110
|
-
#
|
|
111
|
-
# 1. A full GGUF filename (e.g. "model-Q4_0.gguf")
|
|
112
|
-
# 2. A quantization variant (e.g. "Q4_0")
|
|
113
|
-
# This code handles both cases by constructing the appropriate filename
|
|
213
|
+
# This code handles all cases by constructing the appropriate filename or pattern
|
|
114
214
|
checkpoint, variant = self.parse_checkpoint(model_config.checkpoint)
|
|
115
|
-
hf_base_name = checkpoint.split("/")[-1].replace("-GGUF", "")
|
|
116
|
-
variant_name = (
|
|
117
|
-
variant if variant.endswith(".gguf") else f"{hf_base_name}-{variant}.gguf"
|
|
118
|
-
)
|
|
119
215
|
|
|
120
|
-
#
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
216
|
+
# Identify the GGUF model files in the repository that match the variant
|
|
217
|
+
core_files, sharded_files = self.identify_gguf_models(
|
|
218
|
+
checkpoint, variant, model_config.mmproj
|
|
219
|
+
)
|
|
124
220
|
|
|
125
221
|
# Download the files
|
|
126
222
|
snapshot_folder = huggingface_hub.snapshot_download(
|
|
127
223
|
repo_id=checkpoint,
|
|
128
|
-
allow_patterns=list(
|
|
224
|
+
allow_patterns=list(core_files.values()) + sharded_files,
|
|
129
225
|
)
|
|
130
226
|
|
|
131
|
-
#
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
raise ValueError(
|
|
136
|
-
"No patterns matched the variant parameter (CHECKPOINT:VARIANT). "
|
|
137
|
-
"Try again, providing the full filename of your target .gguf file as the variant."
|
|
138
|
-
" For example: Qwen/Qwen2.5-Coder-3B-Instruct-GGUF:"
|
|
139
|
-
"qwen2.5-coder-3b-instruct-q4_0.gguf"
|
|
140
|
-
)
|
|
141
|
-
|
|
142
|
-
# Ensure we downloaded all expected files while creating a dict of the downloaded files
|
|
143
|
-
snapshot_files = {}
|
|
144
|
-
for file in expected_files:
|
|
145
|
-
snapshot_files[file] = os.path.join(snapshot_folder, expected_files[file])
|
|
146
|
-
if expected_files[file].lower() not in [
|
|
147
|
-
name.lower() for name in os.listdir(snapshot_folder)
|
|
148
|
-
]:
|
|
227
|
+
# Ensure we downloaded all expected files
|
|
228
|
+
for file in list(core_files.values()) + sharded_files:
|
|
229
|
+
expected_path = os.path.join(snapshot_folder, file)
|
|
230
|
+
if not os.path.exists(expected_path):
|
|
149
231
|
raise ValueError(
|
|
150
232
|
f"Hugging Face snapshot download for {model_config.checkpoint} "
|
|
151
|
-
f"expected file {
|
|
233
|
+
f"expected file {file} not found at {expected_path}"
|
|
152
234
|
)
|
|
153
235
|
|
|
154
|
-
# Return a dict
|
|
155
|
-
return
|
|
236
|
+
# Return a dict of the full path of the core GGUF files
|
|
237
|
+
return {
|
|
238
|
+
file_name: os.path.join(snapshot_folder, file_path)
|
|
239
|
+
for file_name, file_path in core_files.items()
|
|
240
|
+
}
|
|
156
241
|
|
|
157
242
|
def download_models(
|
|
158
243
|
self,
|
|
@@ -194,9 +279,8 @@ class ModelManager:
|
|
|
194
279
|
new_user_model = {
|
|
195
280
|
"checkpoint": checkpoint,
|
|
196
281
|
"recipe": recipe,
|
|
197
|
-
"reasoning": reasoning,
|
|
198
282
|
"suggested": True,
|
|
199
|
-
"labels": ["custom"],
|
|
283
|
+
"labels": ["custom"] + (["reasoning"] if reasoning else []),
|
|
200
284
|
}
|
|
201
285
|
|
|
202
286
|
if mmproj:
|
|
@@ -249,6 +333,9 @@ class ModelManager:
|
|
|
249
333
|
|
|
250
334
|
user_models[model_name] = new_user_model
|
|
251
335
|
|
|
336
|
+
# Ensure the cache directory exists before writing the file
|
|
337
|
+
os.makedirs(os.path.dirname(USER_MODELS_FILE), exist_ok=True)
|
|
338
|
+
|
|
252
339
|
with open(USER_MODELS_FILE, mode="w", encoding="utf-8") as file:
|
|
253
340
|
json.dump(user_models, fp=file)
|
|
254
341
|
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Optional
|
|
1
|
+
from typing import Optional, Union, List, Any
|
|
2
2
|
|
|
3
3
|
from pydantic import BaseModel
|
|
4
4
|
|
|
@@ -65,6 +65,30 @@ class ChatCompletionRequest(BaseModel):
|
|
|
65
65
|
response_format: dict | None = None
|
|
66
66
|
|
|
67
67
|
|
|
68
|
+
class EmbeddingsRequest(BaseModel):
|
|
69
|
+
"""
|
|
70
|
+
Request model for embeddings API endpoint.
|
|
71
|
+
|
|
72
|
+
Generates embeddings for the provided input text or tokens.
|
|
73
|
+
"""
|
|
74
|
+
|
|
75
|
+
input: Union[str, List]
|
|
76
|
+
model: Optional[str] = None
|
|
77
|
+
encoding_format: Optional[str] = "float" # "float" or "base64"
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
class RerankingRequest(BaseModel):
|
|
81
|
+
"""
|
|
82
|
+
Request model for reranking API endpoint.
|
|
83
|
+
|
|
84
|
+
Reranks a list of documents based on their relevance to a query.
|
|
85
|
+
"""
|
|
86
|
+
|
|
87
|
+
query: str
|
|
88
|
+
documents: List[str]
|
|
89
|
+
model: str
|
|
90
|
+
|
|
91
|
+
|
|
68
92
|
class ResponsesRequest(BaseModel):
|
|
69
93
|
"""
|
|
70
94
|
Request model for responses API endpoint.
|
|
@@ -2,197 +2,177 @@
|
|
|
2
2
|
"Qwen2.5-0.5B-Instruct-CPU": {
|
|
3
3
|
"checkpoint": "amd/Qwen2.5-0.5B-Instruct-quantized_int4-float16-cpu-onnx",
|
|
4
4
|
"recipe": "oga-cpu",
|
|
5
|
-
"reasoning": false,
|
|
6
5
|
"suggested": true
|
|
7
6
|
},
|
|
8
7
|
"Llama-3.2-1B-Instruct-CPU": {
|
|
9
8
|
"checkpoint": "amd/Llama-3.2-1B-Instruct-awq-uint4-float16-cpu-onnx",
|
|
10
9
|
"recipe": "oga-cpu",
|
|
11
|
-
"reasoning": false,
|
|
12
10
|
"suggested": false
|
|
13
11
|
},
|
|
14
12
|
"Llama-3.2-3B-Instruct-CPU": {
|
|
15
13
|
"checkpoint": "amd/Llama-3.2-3B-Instruct-awq-uint4-float16-cpu-onnx",
|
|
16
14
|
"recipe": "oga-cpu",
|
|
17
|
-
"reasoning": false,
|
|
18
15
|
"suggested": false
|
|
19
16
|
},
|
|
20
17
|
"Phi-3-Mini-Instruct-CPU": {
|
|
21
18
|
"checkpoint": "amd/Phi-3-mini-4k-instruct_int4_float16_onnx_cpu",
|
|
22
19
|
"recipe": "oga-cpu",
|
|
23
|
-
"reasoning": false,
|
|
24
20
|
"suggested": true
|
|
25
21
|
},
|
|
26
22
|
"Qwen-1.5-7B-Chat-CPU": {
|
|
27
23
|
"checkpoint": "amd/Qwen1.5-7B-Chat_uint4_asym_g128_float16_onnx_cpu",
|
|
28
24
|
"recipe": "oga-cpu",
|
|
29
|
-
"reasoning": false,
|
|
30
25
|
"suggested": true
|
|
31
26
|
},
|
|
32
27
|
"DeepSeek-R1-Distill-Llama-8B-CPU": {
|
|
33
28
|
"checkpoint": "amd/DeepSeek-R1-Distill-Llama-8B-awq-asym-uint4-g128-lmhead-onnx-cpu",
|
|
34
29
|
"recipe": "oga-cpu",
|
|
35
|
-
"
|
|
36
|
-
"
|
|
30
|
+
"suggested": true,
|
|
31
|
+
"labels": ["reasoning"]
|
|
37
32
|
},
|
|
38
33
|
"DeepSeek-R1-Distill-Qwen-7B-CPU": {
|
|
39
34
|
"checkpoint": "amd/DeepSeek-R1-Distill-Llama-8B-awq-asym-uint4-g128-lmhead-onnx-cpu",
|
|
40
35
|
"recipe": "oga-cpu",
|
|
41
|
-
"
|
|
42
|
-
"
|
|
36
|
+
"suggested": true,
|
|
37
|
+
"labels": ["reasoning"]
|
|
43
38
|
},
|
|
44
39
|
"Llama-3.2-1B-Instruct-Hybrid": {
|
|
45
40
|
"checkpoint": "amd/Llama-3.2-1B-Instruct-awq-g128-int4-asym-fp16-onnx-hybrid",
|
|
46
41
|
"recipe": "oga-hybrid",
|
|
47
|
-
"reasoning": false,
|
|
48
42
|
"max_prompt_length": 3000,
|
|
49
43
|
"suggested": true
|
|
50
44
|
},
|
|
51
45
|
"Llama-3.2-3B-Instruct-Hybrid": {
|
|
52
46
|
"checkpoint": "amd/Llama-3.2-3B-Instruct-awq-g128-int4-asym-fp16-onnx-hybrid",
|
|
53
47
|
"recipe": "oga-hybrid",
|
|
54
|
-
"reasoning": false,
|
|
55
48
|
"max_prompt_length": 2000,
|
|
56
49
|
"suggested": true
|
|
57
50
|
},
|
|
58
51
|
"Phi-3-Mini-Instruct-Hybrid": {
|
|
59
52
|
"checkpoint": "amd/Phi-3-mini-4k-instruct-awq-g128-int4-asym-fp16-onnx-hybrid",
|
|
60
53
|
"recipe": "oga-hybrid",
|
|
61
|
-
"reasoning": false,
|
|
62
54
|
"max_prompt_length": 2000,
|
|
63
55
|
"suggested": true
|
|
64
56
|
},
|
|
65
57
|
"Phi-3.5-Mini-Instruct-Hybrid": {
|
|
66
58
|
"checkpoint": "amd/Phi-3.5-mini-instruct-awq-g128-int4-asym-fp16-onnx-hybrid",
|
|
67
59
|
"recipe": "oga-hybrid",
|
|
68
|
-
"reasoning": false,
|
|
69
60
|
"suggested": false
|
|
70
61
|
},
|
|
71
62
|
"Qwen-1.5-7B-Chat-Hybrid": {
|
|
72
63
|
"checkpoint": "amd/Qwen1.5-7B-Chat-awq-g128-int4-asym-fp16-onnx-hybrid",
|
|
73
64
|
"recipe": "oga-hybrid",
|
|
74
|
-
"reasoning": false,
|
|
75
65
|
"max_prompt_length": 3000,
|
|
76
66
|
"suggested": true
|
|
77
67
|
},
|
|
78
68
|
"DeepSeek-R1-Distill-Llama-8B-Hybrid": {
|
|
79
69
|
"checkpoint": "amd/DeepSeek-R1-Distill-Llama-8B-awq-asym-uint4-g128-lmhead-onnx-hybrid",
|
|
80
70
|
"recipe": "oga-hybrid",
|
|
81
|
-
"reasoning": true,
|
|
82
71
|
"max_prompt_length": 2000,
|
|
83
|
-
"suggested": true
|
|
72
|
+
"suggested": true,
|
|
73
|
+
"labels": ["reasoning"]
|
|
84
74
|
},
|
|
85
75
|
"DeepSeek-R1-Distill-Qwen-7B-Hybrid": {
|
|
86
76
|
"checkpoint": "amd/DeepSeek-R1-Distill-Qwen-7B-awq-asym-uint4-g128-lmhead-onnx-hybrid",
|
|
87
77
|
"recipe": "oga-hybrid",
|
|
88
|
-
"reasoning": true,
|
|
89
78
|
"max_prompt_length": 2000,
|
|
90
|
-
"suggested": true
|
|
79
|
+
"suggested": true,
|
|
80
|
+
"labels": ["reasoning"]
|
|
91
81
|
},
|
|
92
82
|
"Mistral-7B-v0.3-Instruct-Hybrid": {
|
|
93
83
|
"checkpoint": "amd/Mistral-7B-Instruct-v0.3-awq-g128-int4-asym-fp16-onnx-hybrid",
|
|
94
84
|
"recipe": "oga-hybrid",
|
|
95
|
-
"reasoning": false,
|
|
96
85
|
"max_prompt_length": 2000,
|
|
97
86
|
"suggested": true
|
|
98
87
|
},
|
|
99
88
|
"Llama-3.1-8B-Instruct-Hybrid": {
|
|
100
89
|
"checkpoint": "amd/Llama-3.1-8B-Instruct-awq-asym-uint4-g128-lmhead-onnx-hybrid",
|
|
101
90
|
"recipe": "oga-hybrid",
|
|
102
|
-
"reasoning": false,
|
|
103
91
|
"max_prompt_length": 2000,
|
|
104
92
|
"suggested": true
|
|
105
93
|
},
|
|
106
94
|
"Llama-xLAM-2-8b-fc-r-Hybrid": {
|
|
107
95
|
"checkpoint": "amd/Llama-xLAM-2-8b-fc-r-awq-g128-int4-asym-bfp16-onnx-hybrid",
|
|
108
96
|
"recipe": "oga-hybrid",
|
|
109
|
-
"reasoning": false,
|
|
110
97
|
"max_prompt_length": 2000,
|
|
111
98
|
"suggested": true
|
|
112
99
|
},
|
|
113
100
|
"Llama-3.2-1B-Instruct-DirectML": {
|
|
114
101
|
"checkpoint": "amd/Llama-3.2-1B-Instruct-dml-int4-awq-block-128-directml",
|
|
115
102
|
"recipe": "oga-igpu",
|
|
116
|
-
"reasoning": false,
|
|
117
103
|
"suggested": false
|
|
118
104
|
},
|
|
119
105
|
"Llama-3.2-3B-Instruct-DirectML": {
|
|
120
106
|
"checkpoint": "amd/Llama-3.2-3B-Instruct-dml-int4-awq-block-128-directml",
|
|
121
107
|
"recipe": "oga-igpu",
|
|
122
|
-
"reasoning": false,
|
|
123
108
|
"suggested": false
|
|
124
109
|
},
|
|
125
110
|
"Phi-3.5-Mini-Instruct-DirectML": {
|
|
126
111
|
"checkpoint": "amd/phi3.5-mini-instruct-int4-awq-block-128-directml",
|
|
127
112
|
"recipe": "oga-igpu",
|
|
128
|
-
"reasoning": false,
|
|
129
113
|
"suggested": false
|
|
130
114
|
},
|
|
131
115
|
"Qwen-1.5-7B-Chat-DirectML": {
|
|
132
116
|
"checkpoint": "amd/Qwen1.5-7B-Chat-dml-int4-awq-block-128-directml",
|
|
133
117
|
"recipe": "oga-igpu",
|
|
134
|
-
"reasoning": false,
|
|
135
118
|
"suggested": false
|
|
136
119
|
},
|
|
137
120
|
"Mistral-7B-v0.1-Instruct-DirectML": {
|
|
138
121
|
"checkpoint": "amd/Mistral-7B-Instruct-v0.1-awq-g128-int4-onnx-directml",
|
|
139
122
|
"recipe": "oga-igpu",
|
|
140
|
-
"reasoning": false,
|
|
141
123
|
"suggested": false
|
|
142
124
|
},
|
|
143
125
|
"Llama-3-8B-Instruct-DirectML": {
|
|
144
126
|
"checkpoint": "amd/llama3-8b-instruct-awq-g128-int4-onnx-directml",
|
|
145
127
|
"recipe": "oga-igpu",
|
|
146
|
-
"reasoning": false,
|
|
147
128
|
"suggested": false
|
|
148
129
|
},
|
|
149
130
|
"Qwen3-0.6B-GGUF": {
|
|
150
131
|
"checkpoint": "unsloth/Qwen3-0.6B-GGUF:Q4_0",
|
|
151
132
|
"recipe": "llamacpp",
|
|
152
|
-
"
|
|
153
|
-
"
|
|
133
|
+
"suggested": true,
|
|
134
|
+
"labels": ["reasoning"]
|
|
154
135
|
},
|
|
155
136
|
"Qwen3-1.7B-GGUF": {
|
|
156
137
|
"checkpoint": "unsloth/Qwen3-1.7B-GGUF:Q4_0",
|
|
157
138
|
"recipe": "llamacpp",
|
|
158
|
-
"
|
|
159
|
-
"
|
|
139
|
+
"suggested": true,
|
|
140
|
+
"labels": ["reasoning"]
|
|
160
141
|
},
|
|
161
142
|
"Qwen3-4B-GGUF": {
|
|
162
143
|
"checkpoint": "unsloth/Qwen3-4B-GGUF:Q4_0",
|
|
163
144
|
"recipe": "llamacpp",
|
|
164
|
-
"
|
|
165
|
-
"
|
|
145
|
+
"suggested": true,
|
|
146
|
+
"labels": ["reasoning"]
|
|
166
147
|
},
|
|
167
148
|
"Qwen3-8B-GGUF": {
|
|
168
149
|
"checkpoint": "unsloth/Qwen3-8B-GGUF:Q4_1",
|
|
169
150
|
"recipe": "llamacpp",
|
|
170
|
-
"
|
|
171
|
-
"
|
|
151
|
+
"suggested": true,
|
|
152
|
+
"labels": ["reasoning"]
|
|
172
153
|
},
|
|
173
154
|
"DeepSeek-Qwen3-8B-GGUF": {
|
|
174
155
|
"checkpoint": "unsloth/DeepSeek-R1-0528-Qwen3-8B-GGUF:Q4_1",
|
|
175
156
|
"recipe": "llamacpp",
|
|
176
|
-
"
|
|
177
|
-
"
|
|
157
|
+
"suggested": true,
|
|
158
|
+
"labels": ["reasoning"]
|
|
178
159
|
},
|
|
179
160
|
"Qwen3-14B-GGUF": {
|
|
180
161
|
"checkpoint": "unsloth/Qwen3-14B-GGUF:Q4_0",
|
|
181
162
|
"recipe": "llamacpp",
|
|
182
|
-
"
|
|
183
|
-
"
|
|
163
|
+
"suggested": true,
|
|
164
|
+
"labels": ["reasoning"]
|
|
184
165
|
},
|
|
185
166
|
"Qwen3-30B-A3B-GGUF": {
|
|
186
167
|
"checkpoint": "unsloth/Qwen3-30B-A3B-GGUF:Q4_0",
|
|
187
168
|
"recipe": "llamacpp",
|
|
188
|
-
"
|
|
189
|
-
"
|
|
169
|
+
"suggested": true,
|
|
170
|
+
"labels": ["reasoning"]
|
|
190
171
|
},
|
|
191
172
|
"Gemma-3-4b-it-GGUF": {
|
|
192
173
|
"checkpoint": "ggml-org/gemma-3-4b-it-GGUF:Q4_K_M",
|
|
193
174
|
"mmproj": "mmproj-model-f16.gguf",
|
|
194
175
|
"recipe": "llamacpp",
|
|
195
|
-
"reasoning": false,
|
|
196
176
|
"suggested": true,
|
|
197
177
|
"labels": ["vision"]
|
|
198
178
|
},
|
|
@@ -200,8 +180,38 @@
|
|
|
200
180
|
"checkpoint": "ggml-org/Qwen2.5-VL-7B-Instruct-GGUF:Q4_K_M",
|
|
201
181
|
"mmproj": "mmproj-Qwen2.5-VL-7B-Instruct-f16.gguf",
|
|
202
182
|
"recipe": "llamacpp",
|
|
203
|
-
"reasoning": false,
|
|
204
183
|
"suggested": true,
|
|
205
184
|
"labels": ["vision"]
|
|
185
|
+
},
|
|
186
|
+
"Llama-4-Scout-17B-16E-Instruct-GGUF": {
|
|
187
|
+
"checkpoint": "unsloth/Llama-4-Scout-17B-16E-Instruct-GGUF:Q4_K_S",
|
|
188
|
+
"mmproj": "mmproj-F16.gguf",
|
|
189
|
+
"recipe": "llamacpp",
|
|
190
|
+
"suggested": true,
|
|
191
|
+
"labels": ["vision"]
|
|
192
|
+
},
|
|
193
|
+
"nomic-embed-text-v1-GGUF": {
|
|
194
|
+
"checkpoint": "nomic-ai/nomic-embed-text-v1-GGUF:Q4_K_S",
|
|
195
|
+
"recipe": "llamacpp",
|
|
196
|
+
"suggested": true,
|
|
197
|
+
"labels": ["embeddings"]
|
|
198
|
+
},
|
|
199
|
+
"nomic-embed-text-v2-moe-GGUF": {
|
|
200
|
+
"checkpoint": "nomic-ai/nomic-embed-text-v2-moe-GGUF:Q8_0",
|
|
201
|
+
"recipe": "llamacpp",
|
|
202
|
+
"suggested": true,
|
|
203
|
+
"labels": ["embeddings"]
|
|
204
|
+
},
|
|
205
|
+
"bge-reranker-v2-m3-GGUF": {
|
|
206
|
+
"checkpoint": "pqnet/bge-reranker-v2-m3-Q8_0-GGUF",
|
|
207
|
+
"recipe": "llamacpp",
|
|
208
|
+
"suggested": true,
|
|
209
|
+
"labels": ["reranking"]
|
|
210
|
+
},
|
|
211
|
+
"jina-reranker-v1-tiny-en-GGUF": {
|
|
212
|
+
"checkpoint": "mradermacher/jina-reranker-v1-tiny-en-GGUF:Q8_0",
|
|
213
|
+
"recipe": "llamacpp",
|
|
214
|
+
"suggested": false,
|
|
215
|
+
"labels": ["reranking"]
|
|
206
216
|
}
|
|
207
217
|
}
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|