lemonade-sdk 8.0.5__py3-none-any.whl → 8.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lemonade-sdk might be problematic. Click here for more details.

@@ -24,22 +24,6 @@
24
24
  # In any python environment, only one set of artifacts can be installed at a time.
25
25
  # Python environments created by Lemonade v6.1.x or earlier will need to be recreated.
26
26
  #
27
- # The Ryzen AI 1.3.0 artifact files use a different directory hierarchy.
28
- # The Ryzen AI 1.3.0 hybrid artifacts directory hierarchy is:
29
- #
30
- # RYZEN_AI\hybrid\hybrid-llm-artifacts_1.3.0_lounge\hybrid-llm-artifacts\
31
- # onnxruntime_genai\lib
32
- # onnxruntime_genai\wheel
33
- # onnx_utils\bin
34
- # eula\eula
35
- #
36
- # The Ryzen AI 1.3.0 npu artifacts directory hierarchy is:
37
- #
38
- # RYZEN_AI\npu\amd_oga\
39
- # bins\xclbin\stx
40
- # libs
41
- # wheels
42
- #
43
27
 
44
28
  import argparse
45
29
  import glob
@@ -56,6 +40,13 @@ import zipfile
56
40
  DEFAULT_RYZEN_AI_VERSION = "1.4.0"
57
41
  version_info_filename = "version_info.json"
58
42
 
43
+ # NPU Driver configuration
44
+ NPU_DRIVER_DOWNLOAD_URL = (
45
+ "https://account.amd.com/en/forms/downloads/"
46
+ "ryzenai-eula-public-xef.html?filename=NPU_RAI1.5_280_WHQL.zip"
47
+ )
48
+ REQUIRED_NPU_DRIVER_VERSION = "32.0.203.280"
49
+
59
50
  lemonade_install_dir = Path(__file__).parent.parent.parent
60
51
  DEFAULT_QUARK_VERSION = "quark-0.6.0"
61
52
  DEFAULT_QUARK_DIR = os.path.join(
@@ -66,14 +57,6 @@ DEFAULT_QUARK_DIR = os.path.join(
66
57
  SUPPORTED_RYZEN_AI_SERIES = ["300"]
67
58
 
68
59
  npu_install_data = {
69
- "1.3.0": {
70
- "artifacts_zipfile": "ryzen_ai_13_ga/npu-llm-artifacts_1.3.0.zip",
71
- "license_file": (
72
- "https://account.amd.com/content/dam/account/en/licenses/download/"
73
- "amd-end-user-license-agreement.pdf"
74
- ),
75
- "license_tag": "Beta ",
76
- },
77
60
  "1.4.0": {
78
61
  "artifacts_zipfile": (
79
62
  "https://www.xilinx.com/bin/public/openDownload?"
@@ -88,17 +71,6 @@ npu_install_data = {
88
71
  }
89
72
 
90
73
  hybrid_install_data = {
91
- "1.3.0": {
92
- "artifacts_zipfile": (
93
- "https://www.xilinx.com/bin/public/openDownload?"
94
- "filename=hybrid-llm-artifacts_1.3.0_012725.zip"
95
- ),
96
- "license_file": (
97
- "https://www.xilinx.com/bin/public/openDownload?"
98
- "filename=AMD%20End%20User%20License%20Agreement.pdf"
99
- ),
100
- "license_tag": "",
101
- },
102
74
  "1.4.0": {
103
75
  "artifacts_zipfile": (
104
76
  "https://www.xilinx.com/bin/public/openDownload?"
@@ -154,10 +126,7 @@ def get_oga_npu_dir():
154
126
  version_info = get_ryzen_ai_version_info()
155
127
  version = version_info["version"]
156
128
  ryzen_ai_folder = get_ryzen_ai_path()
157
- if "1.3.0" in version:
158
- npu_dir = os.path.join(ryzen_ai_folder, "npu", "amd_oga")
159
- else:
160
- npu_dir = os.path.join(ryzen_ai_folder, "npu")
129
+ npu_dir = os.path.join(ryzen_ai_folder, "npu")
161
130
  if not os.path.isdir(npu_dir):
162
131
  raise RuntimeError(
163
132
  f"The npu artifacts are missing from the Ryzen AI folder {ryzen_ai_folder}. "
@@ -171,15 +140,7 @@ def get_oga_hybrid_dir():
171
140
  version_info = get_ryzen_ai_version_info()
172
141
  version = version_info["version"]
173
142
  ryzen_ai_folder = get_ryzen_ai_path()
174
- if "1.3.0" in version:
175
- hybrid_dir = os.path.join(
176
- ryzen_ai_folder,
177
- "hybrid",
178
- "hybrid-llm-artifacts_1.3.0_lounge",
179
- "hybrid-llm-artifacts",
180
- )
181
- else:
182
- hybrid_dir = os.path.join(ryzen_ai_folder, "hybrid")
143
+ hybrid_dir = os.path.join(ryzen_ai_folder, "hybrid")
183
144
  if not os.path.isdir(hybrid_dir):
184
145
  raise RuntimeError(
185
146
  f"The hybrid artifacts are missing from the Ryzen AI folder {ryzen_ai_folder}. "
@@ -189,6 +150,37 @@ def get_oga_hybrid_dir():
189
150
  return hybrid_dir, version
190
151
 
191
152
 
153
+ def _get_ryzenai_version_info(device=None):
154
+ """
155
+ Centralized version detection for RyzenAI installations.
156
+ Uses lazy imports to avoid import errors when OGA is not installed.
157
+ """
158
+ try:
159
+ # Lazy import to avoid errors when OGA is not installed
160
+ from packaging.version import Version
161
+ import onnxruntime_genai as og
162
+
163
+ if Version(og.__version__) >= Version("0.7.0"):
164
+ oga_path = os.path.dirname(og.__file__)
165
+ if og.__version__ == "0.7.0.2":
166
+ return "1.5.0", oga_path
167
+ else:
168
+ return "1.4.0", oga_path
169
+ else:
170
+ if device == "npu":
171
+ oga_path, version = get_oga_npu_dir()
172
+ else:
173
+ oga_path, version = get_oga_hybrid_dir()
174
+ return version, oga_path
175
+ except ImportError as e:
176
+ raise ImportError(
177
+ f"{e}\n Please install lemonade-sdk with "
178
+ "one of the oga extras, for example:\n"
179
+ "pip install lemonade-sdk[dev,oga-cpu]\n"
180
+ "See https://lemonade_server.ai/install_options.html for details"
181
+ ) from e
182
+
183
+
192
184
  def download_lfs_file(token, file, output_filename):
193
185
  """Downloads a file from LFS"""
194
186
  import requests
@@ -426,8 +418,6 @@ class Install:
426
418
  "npu",
427
419
  "hybrid",
428
420
  "unified",
429
- "npu-1.3.0",
430
- "hybrid-1.3.0",
431
421
  "npu-1.4.0",
432
422
  "hybrid-1.4.0",
433
423
  "unified-1.4.0",
@@ -524,25 +514,14 @@ class Install:
524
514
  # Install all whl files in the specified wheels folder
525
515
  if wheels_full_path is not None:
526
516
  print(f"\nInstalling wheels from {wheels_full_path}\n")
527
- if version == "1.3.0":
528
- # Install one wheel file at a time (1.3.0 npu build only works this way)
529
- for file in os.listdir(wheels_full_path):
530
- if file.endswith(".whl"):
531
- install_cmd = (
532
- f"{sys.executable} -m pip install "
533
- f"{os.path.join(wheels_full_path, file)}"
534
- )
535
- print(f"\nInstalling {file} with command {install_cmd}\n")
536
- subprocess.run(install_cmd, check=True, shell=True)
537
- else:
538
- # Install all the wheel files together, allowing pip to work out the dependencies
539
- wheel_files = glob.glob(os.path.join(wheels_full_path, "*.whl"))
540
- install_cmd = [sys.executable, "-m", "pip", "install"] + wheel_files
541
- subprocess.run(
542
- install_cmd,
543
- check=True,
544
- shell=True,
545
- )
517
+ # Install all the wheel files together, allowing pip to work out the dependencies
518
+ wheel_files = glob.glob(os.path.join(wheels_full_path, "*.whl"))
519
+ install_cmd = [sys.executable, "-m", "pip", "install"] + wheel_files
520
+ subprocess.run(
521
+ install_cmd,
522
+ check=True,
523
+ shell=True,
524
+ )
546
525
 
547
526
  # Delete the zip file
548
527
  print(f"\nCleaning up, removing {archive_file_path}\n")
@@ -611,10 +590,7 @@ class Install:
611
590
  license_file = npu_install_data[version].get("license_file", None)
612
591
  license_tag = npu_install_data[version].get("license_tag", None)
613
592
  install_dir = os.path.join(ryzen_ai_folder, "npu")
614
- if version == "1.3.0":
615
- wheels_full_path = os.path.join(install_dir, "amd_oga/wheels")
616
- else:
617
- wheels_full_path = os.path.join(install_dir, "wheels")
593
+ wheels_full_path = os.path.join(install_dir, "wheels")
618
594
 
619
595
  if license_file:
620
596
  Install._get_license_acceptance(version, license_file, license_tag, yes)
@@ -641,17 +617,7 @@ class Install:
641
617
  license_file = hybrid_install_data[version].get("license_file", None)
642
618
  license_tag = hybrid_install_data[version].get("license_tag", None)
643
619
  install_dir = os.path.join(ryzen_ai_folder, "hybrid")
644
- if version == "1.3.0":
645
- wheels_full_path = os.path.join(
646
- ryzen_ai_folder,
647
- "hybrid",
648
- "hybrid-llm-artifacts_1.3.0_lounge",
649
- "hybrid-llm-artifacts",
650
- "onnxruntime_genai",
651
- "wheel",
652
- )
653
- else:
654
- wheels_full_path = os.path.join(install_dir, "wheels")
620
+ wheels_full_path = os.path.join(install_dir, "wheels")
655
621
 
656
622
  if license_file:
657
623
  Install._get_license_acceptance(version, license_file, license_tag, yes)
@@ -669,6 +635,21 @@ class Install:
669
635
  # Check if the processor is supported before proceeding
670
636
  check_ryzen_ai_processor()
671
637
 
638
+ warning_msg = (
639
+ "\n" + "=" * 80 + "\n"
640
+ "WARNING: IMPORTANT: NEW RYZEN AI 1.5.0 INSTALLATION PROCESS\n"
641
+ + "=" * 80
642
+ + "\n"
643
+ "Starting with Ryzen AI 1.5.0, installation is now available through PyPI.\n"
644
+ "For new installations, consider using:\n\n"
645
+ "pip install lemonade-sdk[oga-ryzenai] --extra-index-url https://pypi.amd.com/simple\n\n"
646
+ "This legacy installation method (lemonade-install --ryzenai) is still\n"
647
+ "supported for version 1.4.0, but may be deprecated in future releases.\n"
648
+ + "=" * 80
649
+ + "\n"
650
+ )
651
+ print(warning_msg)
652
+
672
653
  # Delete any previous Ryzen AI installation in this environment
673
654
  ryzen_ai_folder = get_ryzen_ai_path(check_exists=False)
674
655
  if os.path.exists(ryzen_ai_folder):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lemonade-sdk
3
- Version: 8.0.5
3
+ Version: 8.1.0
4
4
  Summary: Lemonade SDK: Your LLM Aide for Validation and Deployment
5
5
  Author-email: lemonade@amd.com
6
6
  Requires-Python: >=3.10, <3.13
@@ -22,16 +22,15 @@ Requires-Dist: pytz
22
22
  Requires-Dist: zstandard
23
23
  Requires-Dist: fastapi
24
24
  Requires-Dist: uvicorn[standard]
25
- Requires-Dist: openai>=1.81.0
26
- Requires-Dist: transformers<=4.51.3
25
+ Requires-Dist: openai<1.97.1,>=1.81.0
26
+ Requires-Dist: transformers<=4.53.2
27
27
  Requires-Dist: jinja2
28
28
  Requires-Dist: tabulate
29
29
  Requires-Dist: sentencepiece
30
30
  Requires-Dist: huggingface-hub==0.33.0
31
- Provides-Extra: oga-hybrid
32
- Requires-Dist: onnx==1.16.1; extra == "oga-hybrid"
33
- Requires-Dist: numpy==1.26.4; extra == "oga-hybrid"
34
- Requires-Dist: protobuf>=6.30.1; extra == "oga-hybrid"
31
+ Provides-Extra: oga-ryzenai
32
+ Requires-Dist: onnxruntime-genai-directml-ryzenai==0.7.0.2; extra == "oga-ryzenai"
33
+ Requires-Dist: protobuf>=6.30.1; extra == "oga-ryzenai"
35
34
  Provides-Extra: oga-cpu
36
35
  Requires-Dist: onnxruntime-genai==0.8.2; extra == "oga-cpu"
37
36
  Requires-Dist: onnxruntime>=1.22.0; extra == "oga-cpu"
@@ -43,14 +42,32 @@ Requires-Dist: pandas>=1.5.3; extra == "dev"
43
42
  Requires-Dist: matplotlib; extra == "dev"
44
43
  Requires-Dist: human-eval-windows==1.0.4; extra == "dev"
45
44
  Requires-Dist: lm-eval[api]; extra == "dev"
45
+ Provides-Extra: oga-hybrid
46
+ Requires-Dist: lemonade-sdk[oga-ryzenai]; extra == "oga-hybrid"
47
+ Provides-Extra: oga-unified
48
+ Requires-Dist: lemonade-sdk[oga-ryzenai]; extra == "oga-unified"
46
49
  Provides-Extra: oga-hybrid-minimal
47
- Requires-Dist: lemonade-sdk[oga-hybrid]; extra == "oga-hybrid-minimal"
50
+ Requires-Dist: lemonade-sdk[oga-ryzenai]; extra == "oga-hybrid-minimal"
48
51
  Provides-Extra: oga-cpu-minimal
49
52
  Requires-Dist: lemonade-sdk[oga-cpu]; extra == "oga-cpu-minimal"
53
+ Provides-Extra: oga-npu-minimal
54
+ Requires-Dist: lemonade-sdk[oga-ryzenai]; extra == "oga-npu-minimal"
50
55
  Provides-Extra: llm
51
56
  Requires-Dist: lemonade-sdk[dev]; extra == "llm"
52
57
  Provides-Extra: llm-oga-cpu
53
58
  Requires-Dist: lemonade-sdk[dev,oga-cpu]; extra == "llm-oga-cpu"
59
+ Provides-Extra: llm-oga-npu
60
+ Requires-Dist: onnx==1.16.0; extra == "llm-oga-npu"
61
+ Requires-Dist: onnxruntime==1.18.0; extra == "llm-oga-npu"
62
+ Requires-Dist: numpy==1.26.4; extra == "llm-oga-npu"
63
+ Requires-Dist: protobuf>=6.30.1; extra == "llm-oga-npu"
64
+ Requires-Dist: lemonade-sdk[dev]; extra == "llm-oga-npu"
65
+ Provides-Extra: llm-oga-hybrid
66
+ Requires-Dist: onnx==1.16.1; extra == "llm-oga-hybrid"
67
+ Requires-Dist: numpy==1.26.4; extra == "llm-oga-hybrid"
68
+ Requires-Dist: protobuf>=6.30.1; extra == "llm-oga-hybrid"
69
+ Provides-Extra: llm-oga-unified
70
+ Requires-Dist: lemonade-sdk[dev,llm-oga-hybrid]; extra == "llm-oga-unified"
54
71
  Provides-Extra: llm-oga-igpu
55
72
  Requires-Dist: onnxruntime-genai-directml==0.6.0; extra == "llm-oga-igpu"
56
73
  Requires-Dist: onnxruntime-directml<1.22.0,>=1.19.0; extra == "llm-oga-igpu"
@@ -61,16 +78,6 @@ Requires-Dist: onnxruntime-genai-cuda==0.8.2; extra == "llm-oga-cuda"
61
78
  Requires-Dist: onnxruntime-gpu>=1.22.0; extra == "llm-oga-cuda"
62
79
  Requires-Dist: transformers<=4.51.3; extra == "llm-oga-cuda"
63
80
  Requires-Dist: lemonade-sdk[dev]; extra == "llm-oga-cuda"
64
- Provides-Extra: llm-oga-npu
65
- Requires-Dist: onnx==1.16.0; extra == "llm-oga-npu"
66
- Requires-Dist: onnxruntime==1.18.0; extra == "llm-oga-npu"
67
- Requires-Dist: numpy==1.26.4; extra == "llm-oga-npu"
68
- Requires-Dist: protobuf>=6.30.1; extra == "llm-oga-npu"
69
- Requires-Dist: lemonade-sdk[dev]; extra == "llm-oga-npu"
70
- Provides-Extra: llm-oga-hybrid
71
- Requires-Dist: lemonade-sdk[dev,oga-hybrid]; extra == "llm-oga-hybrid"
72
- Provides-Extra: llm-oga-unified
73
- Requires-Dist: lemonade-sdk[llm-oga-hybrid]; extra == "llm-oga-unified"
74
81
  Dynamic: author-email
75
82
  Dynamic: description
76
83
  Dynamic: description-content-type
@@ -174,7 +181,7 @@ lemonade-server list
174
181
 
175
182
  ## Model Library
176
183
 
177
- Lemonade supports both GGUF and ONNX models as detailed in the [Supported Configuration](#supported-configurations) section. A list of all built-in models is available [here](https://lemonade-server.ai/docs/server/models/).
184
+ Lemonade supports both GGUF and ONNX models as detailed in the [Supported Configuration](#supported-configurations) section. A list of all built-in models is available [here](https://lemonade-server.ai/docs/server/server_models/).
178
185
 
179
186
  You can also import custom GGUF and ONNX models from Hugging Face by using our [Model Manager](http://localhost:8000/#model-management) (requires server to be running).
180
187
  <p align="center">
@@ -263,7 +270,7 @@ completion = client.chat.completions.create(
263
270
  print(completion.choices[0].message.content)
264
271
  ```
265
272
 
266
- For more detailed integration instructions, see the [Integration Guide](./server_integration.md).
273
+ For more detailed integration instructions, see the [Integration Guide](./docs/server/server_integration.md).
267
274
 
268
275
  ## Beyond an LLM Server
269
276
 
@@ -272,6 +279,10 @@ The [Lemonade SDK](./docs/README.md) also include the following components:
272
279
  - 🐍 **[Lemonade API](./docs/lemonade_api.md)**: High-level Python API to directly integrate Lemonade LLMs into Python applications.
273
280
  - 🖥️ **[Lemonade CLI](./docs/dev_cli/README.md)**: The `lemonade` CLI lets you mix-and-match LLMs (ONNX, GGUF, SafeTensors) with prompting templates, accuracy testing, performance benchmarking, and memory profiling to characterize your models on your hardware.
274
281
 
282
+ ## FAQ
283
+
284
+ To read our frequently asked questions, see our [FAQ Guide](./docs/faq.md)
285
+
275
286
  ## Contributing
276
287
 
277
288
  We are actively seeking collaborators from across the industry. If you would like to contribute to this project, please check out our [contribution guide](./docs/contribute.md).
@@ -284,7 +295,7 @@ New contributors can find beginner-friendly issues tagged with "Good First Issue
284
295
 
285
296
  ## Maintainers
286
297
 
287
- This project is sponsored by AMD. It is maintained by @danielholanda @jeremyfowers @ramkrishna @vgodsoe in equal measure. You can reach us by filing an [issue](https://github.com/lemonade-sdk/lemonade/issues), email [lemonade@amd.com](mailto:lemonade@amd.com), or join our [Discord](https://discord.gg/5xXzkMu8Zk).
298
+ This project is sponsored by AMD. It is maintained by @danielholanda @jeremyfowers @ramkrishna @vgodsoe in equal measure. You can reach us by filing an [issue](https://github.com/lemonade-sdk/lemonade/issues), emailing [lemonade@amd.com](mailto:lemonade@amd.com), or joining our [Discord](https://discord.gg/5xXzkMu8Zk).
288
299
 
289
300
  ## License
290
301
 
@@ -1,17 +1,17 @@
1
1
  lemonade/__init__.py,sha256=W1Qk7r0rnQqFhPNHp6BIBT_q-OH3s-8Q_POoVfAmKW0,117
2
2
  lemonade/api.py,sha256=kGz8N_9TuN3peFG8fES0odN0bWR9itLNomlR-FC2z8k,5515
3
- lemonade/cache.py,sha256=djr2qgyUUAWlQv8FehU9qlNtCwK0IZqo82hcBDyZ3-A,2850
3
+ lemonade/cache.py,sha256=5iZbk273TiTMqK_vdzPOPYTo6VsWW2gNByOISA9zi1w,3002
4
4
  lemonade/cli.py,sha256=9Pcs3PcrWC2F8_pcBaz09xHUICIJTvpemBdPGyXkjIk,4395
5
5
  lemonade/sequence.py,sha256=KSH7BPsiyDKsOsg_ziQKEGsDwMmuO_YbgPRBxkZd0pw,13267
6
6
  lemonade/state.py,sha256=sdSezla7Cd7KYL90xY3p9kcNV4ndSyN6UvNLOr3vBMA,5261
7
- lemonade/version.py,sha256=obOXkQD52zgzH-mM2spS6LQ-gEWkuaiGpNTM_ISH0D8,22
7
+ lemonade/version.py,sha256=c04nFsyfS0zYoDvZjLO-uEi12TFB5EWSD6fiWiI7OLQ,22
8
8
  lemonade/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
9
  lemonade/common/build.py,sha256=zTb0m1-kuUx6zw5QHp2SNnVuN6jOTMQ2FCdj9iH374U,6140
10
10
  lemonade/common/cli_helpers.py,sha256=hjBfXrTtFl8gmCFlL-ksviXR0mOcdPtTWVNKoEp3PG4,4993
11
11
  lemonade/common/exceptions.py,sha256=w83sVKmL1QXoJlGjj_bRyjIBMhlMqdVQy_FEOTu2YQI,2050
12
12
  lemonade/common/filesystem.py,sha256=QV3cHhKNu-7W2rr8wZ4JQfD2rP_5T2Js7jiDQBYWHVQ,12142
13
13
  lemonade/common/inference_engines.py,sha256=lcmir_pATr71TfSBJoIZEi3G9xyxNwi2_xpPvPD8_xI,12932
14
- lemonade/common/network.py,sha256=EkWTxjJ-UM469nEJdzEl-5UNO7dPfVfFzU2SVMMPaD0,1425
14
+ lemonade/common/network.py,sha256=p1lWJkN0H5hCpb4rKi3Zc47W_BRrrm-7ghdTALJLGqU,1944
15
15
  lemonade/common/printing.py,sha256=GFFzrXIineIOMa9yu0lo5sL4j6A5BBg_T9aUCdP-juw,3229
16
16
  lemonade/common/status.py,sha256=xSOZN508cdRtrs1HVyr9zmASYg69EsZBLSs0lroLoCM,16519
17
17
  lemonade/common/system_info.py,sha256=dOtX8WLHCz1xmURZWnqhDbyNZv_AulrpX_bbI58eHFQ,27084
@@ -21,51 +21,52 @@ lemonade/profilers/memory_tracker.py,sha256=1iuKt0FmNVYLDnOc-oZM8dX9TUksvoxO0m2E
21
21
  lemonade/profilers/profiler.py,sha256=Y5FSbc386bMlTVbqCuya9pYrso5aTthxahR1V_ZKQ9E,1902
22
22
  lemonade/tools/__init__.py,sha256=_6xRc-FHxmujoLjLjWtpYrWYEXtCSneSy-5ya01kyPk,53
23
23
  lemonade/tools/accuracy.py,sha256=9HCmczDngkBUuUrt49d2CkRo4J0qyWoFYs5cj20bGkg,11714
24
- lemonade/tools/adapter.py,sha256=HG54iMd6HDPZ4vnQIl7codq3HzffWbcHSIs_jVbNbhU,2958
24
+ lemonade/tools/adapter.py,sha256=Ex63Y1SPCOSV4M_QtzEn3YVd39d3yew0lpmEFgp8aH4,3169
25
25
  lemonade/tools/bench.py,sha256=aN5LMA_EH6-ZhAH3Gf26JYL7s0eKpUd3j-bReRhzvEY,10016
26
26
  lemonade/tools/humaneval.py,sha256=JbxuoOzvR4iyxZv4R6MI7a3gUt5ef_Jj6Ie-9VP2wzY,9531
27
27
  lemonade/tools/management_tools.py,sha256=U8GaJnjdXyQ9sw8UxBQMc7glpaLciaVphASaQS4kJsA,10202
28
28
  lemonade/tools/mmlu.py,sha256=c2QaIMDzjqxCvgHlMXmy_dP1sAFkwkDxL7RO2nogI6s,11071
29
29
  lemonade/tools/perplexity.py,sha256=eiaTZ3yhqF2pfwOffVbKKJLwjSri7Im2pC-tBJr7LLU,5638
30
- lemonade/tools/prompt.py,sha256=cy6McZeLgk26xG1dJEY-cYnY2x8FUdyOOSG86WfBKCg,9348
30
+ lemonade/tools/prompt.py,sha256=PyLksp1k8jsZsU7XBRK61k1DUHhbdLa20h-AP8Noh3w,9011
31
31
  lemonade/tools/tool.py,sha256=UsxVYukfm_iM3BfeGYPZxQlTK5UfDfDOl3RIyLr8A1Y,13256
32
32
  lemonade/tools/huggingface/bench.py,sha256=-mTfldCtquL4mspq8ykVwDc9Mut5Ecv_jHJnSb0CYGE,6734
33
33
  lemonade/tools/huggingface/load.py,sha256=KsSGOBBD-tNEIfYC8mCWV_jpnkjHMhN3juVmC1Ln4uQ,7745
34
- lemonade/tools/huggingface/utils.py,sha256=xybIWOEXHaMuw-nAEu3aITdvZSHcGKgZ9kFS5mIWcEg,13873
35
- lemonade/tools/llamacpp/bench.py,sha256=A1X8ULQMxPVsff-AdiUsbWQUKpx7U7nFRNHFJRPdv3Q,5946
36
- lemonade/tools/llamacpp/load.py,sha256=o3vVlefdxmdkHnuvFR3TOxiJkpNAuNFcs9Whfp24jpg,9236
34
+ lemonade/tools/huggingface/utils.py,sha256=j1S-IgjDsznUIVwkHSqqChmFyqIx9f3WcEelzohWwvU,13955
35
+ lemonade/tools/llamacpp/bench.py,sha256=1fkE02ecg-jRk92i5dTAXz6re14WH8bd-Z9l-m3lbDA,4844
36
+ lemonade/tools/llamacpp/load.py,sha256=SKacK2n8LpC4DN4yALyEpV2c8_sgOv2G7t6Nlyu7XXg,6273
37
+ lemonade/tools/llamacpp/utils.py,sha256=vHA5kykkdHSsMGmbEA4RyOHr8wFIh1WenfhCvY8WxZs,22445
37
38
  lemonade/tools/oga/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
38
- lemonade/tools/oga/bench.py,sha256=T3c40NevM3NA7CT98B6vBj1nXfdITDqpfMHYSjhjwpA,5061
39
- lemonade/tools/oga/load.py,sha256=XSznW8lOX_KafSq5J5mIBJzj8YJEBpK0RFGcTE1wnE8,28317
40
- lemonade/tools/oga/utils.py,sha256=p7faMNfT-rLURC9t_s1S_STQRzzLADqbngUliTOOXeQ,16144
39
+ lemonade/tools/oga/bench.py,sha256=PJXv4UchcS2YPwijNzef8DY4DSAKYxIYY1ycHuH3T34,5005
40
+ lemonade/tools/oga/load.py,sha256=O82ezF7Jhgz3CJrxDWZYqLHyD_0NS1nsvfMWDaaUI4I,33728
41
+ lemonade/tools/oga/utils.py,sha256=Xd7tmNr69u_bCut0hZqA7saUR3NFZlp4bvWo54mOZb0,16918
41
42
  lemonade/tools/quark/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
42
43
  lemonade/tools/quark/quark_load.py,sha256=FJ4LJKTToZbHHWVEOBLadae1a3jCnnY4KvXySHbkJMA,5589
43
44
  lemonade/tools/quark/quark_quantize.py,sha256=hwoaXhpBIORvJ16MvewphPkaDEQn3BAgXq5o82Gc-_s,16599
44
45
  lemonade/tools/report/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
45
46
  lemonade/tools/report/llm_report.py,sha256=bVHhwCINA-Ok2EdSwAsLubsc83N3KWOVuwTguw7jDcE,6676
46
- lemonade/tools/report/table.py,sha256=wJFzKtlmGQH0RQ5O9nevtpMe_-zQ-8zNOndINQuzsjM,27793
47
+ lemonade/tools/report/table.py,sha256=ssqy1bZqF-wptNzKEOj6_9REtCNZyXO8R5vakAtg3R4,27973
47
48
  lemonade/tools/server/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
48
- lemonade/tools/server/llamacpp.py,sha256=e1MYKSJBu-jlOE5GQSBsC9CUPAeqw5wXXxoxBKA5zb8,20038
49
- lemonade/tools/server/serve.py,sha256=Pp_w4iuRMkpJLF-XrTsBIBrSNBQIOl8PRZC_Cj4URnU,57334
49
+ lemonade/tools/server/llamacpp.py,sha256=OP0j74QcowEu3zFEcrKIsBbGDOFemBXS5F5DC6oQHaI,18853
50
+ lemonade/tools/server/serve.py,sha256=0-NprfsU-YrX8Qsf1atEi6wPJWemrPjHKEBHV69SwCQ,57046
50
51
  lemonade/tools/server/tool_calls.py,sha256=xrAlQwKG-nv2xLlf8f9CDSaUbyMn8ZtHkds9iZLG9K8,5230
51
- lemonade/tools/server/tray.py,sha256=4Kf3x8YfRaItPW7lxlEwerD7c5Q2snzcNk3ZrEoae58,17259
52
+ lemonade/tools/server/tray.py,sha256=yoGCM8j_2KzPqo-AlYiauWd8QR56yp6jW6HZ9921Ydg,17525
52
53
  lemonade/tools/server/webapp.py,sha256=8Das5yXOaSBLZmSZ_eddJajQFxBhvl5D6GI_hHlGbE0,1040
53
54
  lemonade/tools/server/static/favicon.ico,sha256=hMmP9qGJNeZ0mFS86JIqPbZstXMZn0Z76_HfHQpREAU,126745
54
- lemonade/tools/server/static/styles.css,sha256=jXFPIHPrhRz_CJyRJrYusAECSDTO00sKUu7ajrQgFuA,24655
55
- lemonade/tools/server/static/webapp.html,sha256=tmwASvULb3d2_NfHEH9rKbEEJl3D7ygXjaCLVYkyWbg,35969
55
+ lemonade/tools/server/static/styles.css,sha256=8wQ5Cg4rbEh03kC8t7ALE7dB20GiD0Pfu5BAxh9hECU,26429
56
+ lemonade/tools/server/static/webapp.html,sha256=KZm1ZFIhQzLT2Y2wy3hFsQxcOxFzv-blaeLzc1ODhb8,36396
56
57
  lemonade/tools/server/utils/port.py,sha256=XnIg2qS73QRrsJn6LgHcrJPmku30Tv6vsYcBVMj82K4,2186
57
58
  lemonade/tools/server/utils/system_tray.py,sha256=b9lvNv9chJKQxvmH7qzAuUe6H9HsLu7pdHFqGlAJaL0,12654
58
59
  lemonade/tools/server/utils/thread.py,sha256=pK9K_6DNWoQ78NArkAX3Ym2WsxLnCs9sKTk6TitlYnI,2804
59
60
  lemonade_install/__init__.py,sha256=26zohKg2jgr_5y7tObduWMYQg8zCTWMZHL8lfi2zZVQ,40
60
- lemonade_install/install.py,sha256=DJWR36QSjZtvEwRjYPNSjhYgoxLjI_6OPrCMZjL0ChY,28263
61
- lemonade_sdk-8.0.5.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
62
- lemonade_sdk-8.0.5.dist-info/licenses/NOTICE.md,sha256=B8lEqi4QE41J9ljz4Riv2JgHD1v8GCZE6nNBHO3KIA0,2135
63
- lemonade_server/cli.py,sha256=2Un5uLK04fIxlfcTiZ0T_EWbbaq2tYymkUHNFeuvB7g,16041
64
- lemonade_server/model_manager.py,sha256=0HqLR38uOu_hxRWVYQ_P6YmwaR-jkDuaAqGYo60X8C0,16702
65
- lemonade_server/pydantic_models.py,sha256=rp_FFhoTwg6jNmgol-kShwffnRDGbt7jTbIeELvgOIo,2876
66
- lemonade_server/server_models.json,sha256=Y-j9KAvHmfv77welC0rfRao4inLBce6AVySb-oy_uNE,7519
67
- lemonade_sdk-8.0.5.dist-info/METADATA,sha256=e2w0jPyEnyk-SeLAbYZgeGldq-2CQHm9Hly_mQgZ8uo,15224
68
- lemonade_sdk-8.0.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
69
- lemonade_sdk-8.0.5.dist-info/entry_points.txt,sha256=gJppn0ETtXXR6ceKWEIRdk42kMC7ps59EmU3NCPyPUk,144
70
- lemonade_sdk-8.0.5.dist-info/top_level.txt,sha256=10ap5GNiPhalO4V50LRoxA1FqRT9g3Xkia6BITu880k,42
71
- lemonade_sdk-8.0.5.dist-info/RECORD,,
61
+ lemonade_install/install.py,sha256=TBX-VwEHcPo4WX0K_12pKKINnIK3o4SUo3L5XjkqEtw,27669
62
+ lemonade_sdk-8.1.0.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
63
+ lemonade_sdk-8.1.0.dist-info/licenses/NOTICE.md,sha256=B8lEqi4QE41J9ljz4Riv2JgHD1v8GCZE6nNBHO3KIA0,2135
64
+ lemonade_server/cli.py,sha256=6QJ5fxNLuVUbuHauA5JHXf0H5dqJ5E4GNTo4YoMOJtg,16049
65
+ lemonade_server/model_manager.py,sha256=O3fIX52AqU0z10WzPmNEA3lQ_KjOqNq_G-SxjwIgEio,10781
66
+ lemonade_server/pydantic_models.py,sha256=qEvF7x7AuHCHMiByVzGGuLdQTNs233Sw9uQq5cpI6is,2721
67
+ lemonade_server/server_models.json,sha256=gitKHj_VHANxjtcXeE5zFpukVO0HyEfKhu3ZaZsj2xo,8867
68
+ lemonade_sdk-8.1.0.dist-info/METADATA,sha256=c3JxCUYw5ujhGSb3FX3mG6UmgG5BLqik8a5j4oe8n7o,15712
69
+ lemonade_sdk-8.1.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
70
+ lemonade_sdk-8.1.0.dist-info/entry_points.txt,sha256=gJppn0ETtXXR6ceKWEIRdk42kMC7ps59EmU3NCPyPUk,144
71
+ lemonade_sdk-8.1.0.dist-info/top_level.txt,sha256=10ap5GNiPhalO4V50LRoxA1FqRT9g3Xkia6BITu880k,42
72
+ lemonade_sdk-8.1.0.dist-info/RECORD,,
lemonade_server/cli.py CHANGED
@@ -57,7 +57,7 @@ def serve(
57
57
  log_level = log_level if log_level is not None else DEFAULT_LOG_LEVEL
58
58
 
59
59
  # Hidden environment variable to enable input truncation (experimental feature)
60
- truncate_inputs = "LEMONADE_TRUNCATE_INPUTS" in os.environ
60
+ truncate_inputs = os.environ.get("LEMONADE_TRUNCATE_INPUTS", None)
61
61
 
62
62
  # Start the server
63
63
  serve_kwargs = {
@@ -6,31 +6,14 @@ import huggingface_hub
6
6
  from importlib.metadata import distributions
7
7
  from lemonade_server.pydantic_models import PullConfig
8
8
  from lemonade.cache import DEFAULT_CACHE_DIR
9
+ from lemonade.tools.llamacpp.utils import parse_checkpoint, download_gguf
10
+ from lemonade.common.network import custom_snapshot_download
9
11
 
10
12
  USER_MODELS_FILE = os.path.join(DEFAULT_CACHE_DIR, "user_models.json")
11
13
 
12
14
 
13
15
  class ModelManager:
14
16
 
15
- @staticmethod
16
- def parse_checkpoint(checkpoint: str) -> tuple[str, str | None]:
17
- """
18
- Parse a checkpoint string that may contain a variant separated by a colon.
19
-
20
- For GGUF models, the format is "repository:variant" (e.g., "unsloth/Qwen3-0.6B-GGUF:Q4_0").
21
- For other models, there is no variant.
22
-
23
- Args:
24
- checkpoint: The checkpoint string, potentially with variant
25
-
26
- Returns:
27
- tuple: (base_checkpoint, variant) where variant is None if no colon is present
28
- """
29
- if ":" in checkpoint:
30
- base_checkpoint, variant = checkpoint.split(":", 1)
31
- return base_checkpoint, variant
32
- return checkpoint, None
33
-
34
17
  @property
35
18
  def supported_models(self) -> dict:
36
19
  """
@@ -98,7 +81,7 @@ class ModelManager:
98
81
  downloaded_models = {}
99
82
  downloaded_checkpoints = self.downloaded_hf_checkpoints
100
83
  for model in self.supported_models:
101
- base_checkpoint = self.parse_checkpoint(
84
+ base_checkpoint = parse_checkpoint(
102
85
  self.supported_models[model]["checkpoint"]
103
86
  )[0]
104
87
  if base_checkpoint in downloaded_checkpoints:
@@ -113,132 +96,6 @@ class ModelManager:
113
96
  """
114
97
  return self.filter_models_by_backend(self.downloaded_models)
115
98
 
116
- def identify_gguf_models(
117
- self, checkpoint: str, variant: str, mmproj: str
118
- ) -> tuple[dict, list[str]]:
119
- """
120
- Identifies the GGUF model files in the repository that match the variant.
121
- """
122
-
123
- hint = """
124
- The CHECKPOINT:VARIANT scheme is used to specify model files in Hugging Face repositories.
125
-
126
- The VARIANT format can be one of several types:
127
- 1. Full filename: exact file to download
128
- 2. None/empty: gets the first .gguf file in the repository (excludes mmproj files)
129
- 3. Quantization variant: find a single file ending with the variant name (case insensitive)
130
- 4. Folder name: downloads all .gguf files in the folder that matches the variant name (case insensitive)
131
-
132
- Examples:
133
- - "unsloth/Qwen3-8B-GGUF:qwen3.gguf" -> downloads "qwen3.gguf"
134
- - "unsloth/Qwen3-30B-A3B-GGUF" -> downloads "Qwen3-30B-A3B-GGUF.gguf"
135
- - "unsloth/Qwen3-8B-GGUF:Q4_1" -> downloads "Qwen3-8B-GGUF-Q4_1.gguf"
136
- - "unsloth/Qwen3-30B-A3B-GGUF:Q4_0" -> downloads all files in "Q4_0/" folder
137
- """
138
-
139
- repo_files = huggingface_hub.list_repo_files(checkpoint)
140
- sharded_files = []
141
-
142
- # (case 1) If variant ends in .gguf, use it directly
143
- if variant and variant.endswith(".gguf"):
144
- variant_name = variant
145
- if variant_name not in repo_files:
146
- raise ValueError(
147
- f"File {variant} not found in Hugging Face repository {checkpoint}. {hint}"
148
- )
149
- # (case 2) If no variant is provided, get the first .gguf file in the repository
150
- elif variant is None:
151
- all_variants = [
152
- f for f in repo_files if f.endswith(".gguf") and "mmproj" not in f
153
- ]
154
- if len(all_variants) == 0:
155
- raise ValueError(
156
- f"No .gguf files found in Hugging Face repository {checkpoint}. {hint}"
157
- )
158
- variant_name = all_variants[0]
159
- else:
160
- # (case 3) Find a single file ending with the variant name (case insensitive)
161
- end_with_variant = [
162
- f
163
- for f in repo_files
164
- if f.lower().endswith(f"{variant}.gguf".lower())
165
- and "mmproj" not in f.lower()
166
- ]
167
- if len(end_with_variant) == 1:
168
- variant_name = end_with_variant[0]
169
- elif len(end_with_variant) > 1:
170
- raise ValueError(
171
- f"Multiple .gguf files found for variant {variant}, but only one is allowed. {hint}"
172
- )
173
- # (case 4) Check whether the variant corresponds to a folder with sharded files (case insensitive)
174
- else:
175
- sharded_files = [
176
- f
177
- for f in repo_files
178
- if f.endswith(".gguf")
179
- and f.lower().startswith(f"{variant}/".lower())
180
- ]
181
-
182
- if not sharded_files:
183
- raise ValueError(
184
- f"No .gguf files found for variant {variant}. {hint}"
185
- )
186
-
187
- # Sort to ensure consistent ordering
188
- sharded_files.sort()
189
-
190
- # Use first file as primary (this is how llamacpp handles it)
191
- variant_name = sharded_files[0]
192
-
193
- core_files = {"variant": variant_name}
194
-
195
- # If there is a mmproj file, add it to the patterns
196
- if mmproj:
197
- if mmproj not in repo_files:
198
- raise ValueError(
199
- f"The provided mmproj file {mmproj} was not found in {checkpoint}."
200
- )
201
- core_files["mmproj"] = mmproj
202
-
203
- return core_files, sharded_files
204
-
205
- def download_gguf(self, model_config: PullConfig) -> dict:
206
- """
207
- Downloads the GGUF file for the given model configuration.
208
-
209
- For sharded models, if the variant points to a folder (e.g. Q4_0), all files in that folder
210
- will be downloaded but only the first file will be returned for loading.
211
- """
212
-
213
- # This code handles all cases by constructing the appropriate filename or pattern
214
- checkpoint, variant = self.parse_checkpoint(model_config.checkpoint)
215
-
216
- # Identify the GGUF model files in the repository that match the variant
217
- core_files, sharded_files = self.identify_gguf_models(
218
- checkpoint, variant, model_config.mmproj
219
- )
220
-
221
- # Download the files
222
- snapshot_folder = huggingface_hub.snapshot_download(
223
- repo_id=checkpoint,
224
- allow_patterns=list(core_files.values()) + sharded_files,
225
- )
226
-
227
- # Ensure we downloaded all expected files
228
- for file in list(core_files.values()) + sharded_files:
229
- expected_path = os.path.join(snapshot_folder, file)
230
- if not os.path.exists(expected_path):
231
- raise ValueError(
232
- f"Hugging Face snapshot download for {model_config.checkpoint} "
233
- f"expected file {file} not found at {expected_path}"
234
- )
235
-
236
- # Return a dict of the full path of the core GGUF files
237
- return {
238
- file_name: os.path.join(snapshot_folder, file_path)
239
- for file_name, file_path in core_files.items()
240
- }
241
-
242
99
  def download_models(
243
100
  self,
244
101
  models: list[str],
@@ -317,9 +174,9 @@ class ModelManager:
317
174
  print(f"Downloading {model} ({checkpoint_to_download})")
318
175
 
319
176
  if "gguf" in checkpoint_to_download.lower():
320
- self.download_gguf(gguf_model_config)
177
+ download_gguf(gguf_model_config.checkpoint, gguf_model_config.mmproj)
321
178
  else:
322
- huggingface_hub.snapshot_download(repo_id=checkpoint_to_download)
179
+ custom_snapshot_download(checkpoint_to_download)
323
180
 
324
181
  # Register the model in user_models.json, creating that file if needed
325
182
  # We do this registration after the download so that we don't register
@@ -373,12 +230,12 @@ class ModelManager:
373
230
  print(f"Deleting {model_name} ({checkpoint})")
374
231
 
375
232
  # Handle GGUF models that have the format "checkpoint:variant"
376
- base_checkpoint = self.parse_checkpoint(checkpoint)[0]
233
+ base_checkpoint = parse_checkpoint(checkpoint)[0]
377
234
 
378
235
  try:
379
236
  # Get the local path using snapshot_download with local_files_only=True
380
- snapshot_path = huggingface_hub.snapshot_download(
381
- repo_id=base_checkpoint, local_files_only=True
237
+ snapshot_path = custom_snapshot_download(
238
+ base_checkpoint, local_files_only=True
382
239
  )
383
240
 
384
241
  # Navigate up to the model directory (parent of snapshots directory)