lemonade-sdk 7.0.4__tar.gz → 8.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lemonade-sdk might be problematic. Click here for more details.

Files changed (86) hide show
  1. {lemonade_sdk-7.0.4/src/lemonade_sdk.egg-info → lemonade_sdk-8.0.0}/PKG-INFO +84 -22
  2. lemonade_sdk-8.0.0/README.md +97 -0
  3. {lemonade_sdk-7.0.4 → lemonade_sdk-8.0.0}/setup.py +31 -14
  4. {lemonade_sdk-7.0.4 → lemonade_sdk-8.0.0}/src/lemonade/api.py +3 -3
  5. {lemonade_sdk-7.0.4 → lemonade_sdk-8.0.0}/src/lemonade/cli.py +11 -17
  6. {lemonade_sdk-7.0.4 → lemonade_sdk-8.0.0}/src/lemonade/common/build.py +0 -47
  7. lemonade_sdk-8.0.0/src/lemonade/common/network.py +50 -0
  8. {lemonade_sdk-7.0.4 → lemonade_sdk-8.0.0}/src/lemonade/common/status.py +2 -21
  9. {lemonade_sdk-7.0.4 → lemonade_sdk-8.0.0}/src/lemonade/common/system_info.py +19 -4
  10. {lemonade_sdk-7.0.4 → lemonade_sdk-8.0.0}/src/lemonade/profilers/memory_tracker.py +3 -1
  11. {lemonade_sdk-7.0.4 → lemonade_sdk-8.0.0}/src/lemonade/tools/accuracy.py +3 -4
  12. {lemonade_sdk-7.0.4 → lemonade_sdk-8.0.0}/src/lemonade/tools/adapter.py +1 -2
  13. lemonade_sdk-7.0.4/src/lemonade/tools/huggingface_bench.py → lemonade_sdk-8.0.0/src/lemonade/tools/huggingface/bench.py +2 -87
  14. lemonade_sdk-8.0.0/src/lemonade/tools/huggingface/load.py +235 -0
  15. lemonade_sdk-7.0.4/src/lemonade/tools/huggingface_load.py → lemonade_sdk-8.0.0/src/lemonade/tools/huggingface/utils.py +87 -255
  16. {lemonade_sdk-7.0.4 → lemonade_sdk-8.0.0}/src/lemonade/tools/humaneval.py +9 -3
  17. lemonade_sdk-7.0.4/src/lemonade/tools/llamacpp_bench.py → lemonade_sdk-8.0.0/src/lemonade/tools/llamacpp/bench.py +1 -1
  18. lemonade_sdk-7.0.4/src/lemonade/tools/llamacpp.py → lemonade_sdk-8.0.0/src/lemonade/tools/llamacpp/load.py +18 -2
  19. {lemonade_sdk-7.0.4 → lemonade_sdk-8.0.0}/src/lemonade/tools/mmlu.py +7 -15
  20. lemonade_sdk-7.0.4/src/lemonade/tools/ort_genai/oga.py → lemonade_sdk-8.0.0/src/lemonade/tools/oga/load.py +31 -422
  21. lemonade_sdk-8.0.0/src/lemonade/tools/oga/utils.py +423 -0
  22. {lemonade_sdk-7.0.4 → lemonade_sdk-8.0.0}/src/lemonade/tools/perplexity.py +4 -3
  23. {lemonade_sdk-7.0.4 → lemonade_sdk-8.0.0}/src/lemonade/tools/prompt.py +2 -1
  24. {lemonade_sdk-7.0.4 → lemonade_sdk-8.0.0}/src/lemonade/tools/quark/quark_load.py +2 -1
  25. {lemonade_sdk-7.0.4 → lemonade_sdk-8.0.0}/src/lemonade/tools/quark/quark_quantize.py +5 -5
  26. {lemonade_sdk-7.0.4 → lemonade_sdk-8.0.0}/src/lemonade/tools/report/table.py +3 -3
  27. {lemonade_sdk-7.0.4 → lemonade_sdk-8.0.0}/src/lemonade/tools/server/llamacpp.py +154 -29
  28. {lemonade_sdk-7.0.4 → lemonade_sdk-8.0.0}/src/lemonade/tools/server/serve.py +169 -146
  29. lemonade_sdk-8.0.0/src/lemonade/tools/server/static/favicon.ico +0 -0
  30. lemonade_sdk-8.0.0/src/lemonade/tools/server/static/styles.css +881 -0
  31. lemonade_sdk-8.0.0/src/lemonade/tools/server/static/webapp.html +439 -0
  32. lemonade_sdk-8.0.0/src/lemonade/tools/server/tray.py +458 -0
  33. lemonade_sdk-7.0.4/src/lemonade/tools/server/port_utils.py → lemonade_sdk-8.0.0/src/lemonade/tools/server/utils/port.py +22 -3
  34. lemonade_sdk-8.0.0/src/lemonade/tools/server/utils/system_tray.py +395 -0
  35. lemonade_sdk-7.0.4/src/lemonade/tools/server/instructions.py → lemonade_sdk-8.0.0/src/lemonade/tools/server/webapp.py +4 -10
  36. lemonade_sdk-8.0.0/src/lemonade/version.py +1 -0
  37. {lemonade_sdk-7.0.4 → lemonade_sdk-8.0.0}/src/lemonade_install/install.py +46 -28
  38. {lemonade_sdk-7.0.4 → lemonade_sdk-8.0.0/src/lemonade_sdk.egg-info}/PKG-INFO +84 -22
  39. {lemonade_sdk-7.0.4 → lemonade_sdk-8.0.0}/src/lemonade_sdk.egg-info/SOURCES.txt +17 -16
  40. {lemonade_sdk-7.0.4 → lemonade_sdk-8.0.0}/src/lemonade_sdk.egg-info/requires.txt +19 -13
  41. {lemonade_sdk-7.0.4 → lemonade_sdk-8.0.0}/src/lemonade_server/cli.py +182 -27
  42. lemonade_sdk-8.0.0/src/lemonade_server/model_manager.py +320 -0
  43. {lemonade_sdk-7.0.4 → lemonade_sdk-8.0.0}/src/lemonade_server/pydantic_models.py +9 -4
  44. {lemonade_sdk-7.0.4 → lemonade_sdk-8.0.0}/src/lemonade_server/server_models.json +5 -3
  45. lemonade_sdk-7.0.4/README.md +0 -39
  46. lemonade_sdk-7.0.4/src/lemonade/common/analyze_model.py +0 -26
  47. lemonade_sdk-7.0.4/src/lemonade/common/labels.py +0 -61
  48. lemonade_sdk-7.0.4/src/lemonade/common/onnx_helpers.py +0 -176
  49. lemonade_sdk-7.0.4/src/lemonade/common/plugins.py +0 -10
  50. lemonade_sdk-7.0.4/src/lemonade/common/tensor_helpers.py +0 -83
  51. lemonade_sdk-7.0.4/src/lemonade/tools/server/static/instructions.html +0 -262
  52. lemonade_sdk-7.0.4/src/lemonade/tools/server/static/styles.css +0 -313
  53. lemonade_sdk-7.0.4/src/lemonade/version.py +0 -1
  54. lemonade_sdk-7.0.4/src/lemonade_server/model_manager.py +0 -148
  55. {lemonade_sdk-7.0.4 → lemonade_sdk-8.0.0}/LICENSE +0 -0
  56. {lemonade_sdk-7.0.4 → lemonade_sdk-8.0.0}/NOTICE.md +0 -0
  57. {lemonade_sdk-7.0.4 → lemonade_sdk-8.0.0}/pyproject.toml +0 -0
  58. {lemonade_sdk-7.0.4 → lemonade_sdk-8.0.0}/setup.cfg +0 -0
  59. {lemonade_sdk-7.0.4 → lemonade_sdk-8.0.0}/src/lemonade/__init__.py +0 -0
  60. {lemonade_sdk-7.0.4 → lemonade_sdk-8.0.0}/src/lemonade/cache.py +0 -0
  61. {lemonade_sdk-7.0.4 → lemonade_sdk-8.0.0}/src/lemonade/common/__init__.py +0 -0
  62. {lemonade_sdk-7.0.4 → lemonade_sdk-8.0.0}/src/lemonade/common/cli_helpers.py +0 -0
  63. {lemonade_sdk-7.0.4 → lemonade_sdk-8.0.0}/src/lemonade/common/exceptions.py +0 -0
  64. {lemonade_sdk-7.0.4 → lemonade_sdk-8.0.0}/src/lemonade/common/filesystem.py +0 -0
  65. {lemonade_sdk-7.0.4 → lemonade_sdk-8.0.0}/src/lemonade/common/printing.py +0 -0
  66. {lemonade_sdk-7.0.4 → lemonade_sdk-8.0.0}/src/lemonade/common/test_helpers.py +0 -0
  67. {lemonade_sdk-7.0.4 → lemonade_sdk-8.0.0}/src/lemonade/profilers/__init__.py +0 -0
  68. {lemonade_sdk-7.0.4 → lemonade_sdk-8.0.0}/src/lemonade/profilers/profiler.py +0 -0
  69. {lemonade_sdk-7.0.4 → lemonade_sdk-8.0.0}/src/lemonade/sequence.py +0 -0
  70. {lemonade_sdk-7.0.4 → lemonade_sdk-8.0.0}/src/lemonade/state.py +0 -0
  71. {lemonade_sdk-7.0.4 → lemonade_sdk-8.0.0}/src/lemonade/tools/__init__.py +0 -0
  72. {lemonade_sdk-7.0.4 → lemonade_sdk-8.0.0}/src/lemonade/tools/bench.py +0 -0
  73. {lemonade_sdk-7.0.4 → lemonade_sdk-8.0.0}/src/lemonade/tools/management_tools.py +0 -0
  74. {lemonade_sdk-7.0.4/src/lemonade/tools/ort_genai → lemonade_sdk-8.0.0/src/lemonade/tools/oga}/__init__.py +0 -0
  75. /lemonade_sdk-7.0.4/src/lemonade/tools/ort_genai/oga_bench.py → /lemonade_sdk-8.0.0/src/lemonade/tools/oga/bench.py +0 -0
  76. {lemonade_sdk-7.0.4 → lemonade_sdk-8.0.0}/src/lemonade/tools/quark/__init__.py +0 -0
  77. {lemonade_sdk-7.0.4 → lemonade_sdk-8.0.0}/src/lemonade/tools/report/__init__.py +0 -0
  78. {lemonade_sdk-7.0.4 → lemonade_sdk-8.0.0}/src/lemonade/tools/report/llm_report.py +0 -0
  79. {lemonade_sdk-7.0.4 → lemonade_sdk-8.0.0}/src/lemonade/tools/server/__init__.py +0 -0
  80. {lemonade_sdk-7.0.4 → lemonade_sdk-8.0.0}/src/lemonade/tools/server/tool_calls.py +0 -0
  81. /lemonade_sdk-7.0.4/src/lemonade/tools/server/thread_utils.py → /lemonade_sdk-8.0.0/src/lemonade/tools/server/utils/thread.py +0 -0
  82. {lemonade_sdk-7.0.4 → lemonade_sdk-8.0.0}/src/lemonade/tools/tool.py +0 -0
  83. {lemonade_sdk-7.0.4 → lemonade_sdk-8.0.0}/src/lemonade_install/__init__.py +0 -0
  84. {lemonade_sdk-7.0.4 → lemonade_sdk-8.0.0}/src/lemonade_sdk.egg-info/dependency_links.txt +0 -0
  85. {lemonade_sdk-7.0.4 → lemonade_sdk-8.0.0}/src/lemonade_sdk.egg-info/entry_points.txt +0 -0
  86. {lemonade_sdk-7.0.4 → lemonade_sdk-8.0.0}/src/lemonade_sdk.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lemonade-sdk
3
- Version: 7.0.4
3
+ Version: 8.0.0
4
4
  Summary: Lemonade SDK: Your LLM Aide for Validation and Deployment
5
5
  Author-email: lemonade@amd.com
6
6
  Requires-Python: >=3.10, <3.12
@@ -9,36 +9,42 @@ License-File: LICENSE
9
9
  License-File: NOTICE.md
10
10
  Requires-Dist: invoke>=2.0.0
11
11
  Requires-Dist: onnx<1.18.0,>=1.11.0
12
- Requires-Dist: torch>=1.12.1
13
12
  Requires-Dist: pyyaml>=5.4
14
13
  Requires-Dist: typeguard>=2.3.13
15
14
  Requires-Dist: packaging>=20.9
16
15
  Requires-Dist: numpy<2.0.0
17
- Requires-Dist: pandas>=1.5.3
18
16
  Requires-Dist: fasteners
19
17
  Requires-Dist: GitPython>=3.1.40
20
18
  Requires-Dist: psutil>=6.1.1
21
19
  Requires-Dist: wmi
20
+ Requires-Dist: py-cpuinfo
22
21
  Requires-Dist: pytz
23
22
  Requires-Dist: zstandard
24
- Requires-Dist: matplotlib
23
+ Requires-Dist: fastapi
24
+ Requires-Dist: uvicorn[standard]
25
+ Requires-Dist: openai>=1.81.0
26
+ Requires-Dist: transformers<=4.51.3
27
+ Requires-Dist: jinja2
25
28
  Requires-Dist: tabulate
26
29
  Requires-Dist: huggingface-hub==0.30.2
30
+ Provides-Extra: oga-hybrid-minimal
31
+ Requires-Dist: onnx==1.16.1; extra == "oga-hybrid-minimal"
32
+ Requires-Dist: numpy==1.26.4; extra == "oga-hybrid-minimal"
33
+ Requires-Dist: protobuf>=6.30.1; extra == "oga-hybrid-minimal"
34
+ Provides-Extra: oga-cpu-minimal
35
+ Requires-Dist: onnxruntime-genai==0.6.0; extra == "oga-cpu-minimal"
36
+ Requires-Dist: onnxruntime<1.22.0,>=1.10.1; extra == "oga-cpu-minimal"
27
37
  Provides-Extra: llm
28
38
  Requires-Dist: torch>=2.6.0; extra == "llm"
29
- Requires-Dist: transformers<=4.51.3; extra == "llm"
30
39
  Requires-Dist: accelerate; extra == "llm"
31
- Requires-Dist: py-cpuinfo; extra == "llm"
32
40
  Requires-Dist: sentencepiece; extra == "llm"
33
41
  Requires-Dist: datasets; extra == "llm"
42
+ Requires-Dist: pandas>=1.5.3; extra == "llm"
43
+ Requires-Dist: matplotlib; extra == "llm"
34
44
  Requires-Dist: human-eval-windows==1.0.4; extra == "llm"
35
- Requires-Dist: fastapi; extra == "llm"
36
- Requires-Dist: uvicorn[standard]; extra == "llm"
37
- Requires-Dist: openai>=1.81.0; extra == "llm"
38
45
  Requires-Dist: lm-eval[api]; extra == "llm"
39
46
  Provides-Extra: llm-oga-cpu
40
- Requires-Dist: onnxruntime-genai==0.6.0; extra == "llm-oga-cpu"
41
- Requires-Dist: onnxruntime<1.22.0,>=1.10.1; extra == "llm-oga-cpu"
47
+ Requires-Dist: lemonade-sdk[oga-cpu-minimal]; extra == "llm-oga-cpu"
42
48
  Requires-Dist: lemonade-sdk[llm]; extra == "llm-oga-cpu"
43
49
  Provides-Extra: llm-oga-igpu
44
50
  Requires-Dist: onnxruntime-genai-directml==0.6.0; extra == "llm-oga-igpu"
@@ -57,9 +63,7 @@ Requires-Dist: numpy==1.26.4; extra == "llm-oga-npu"
57
63
  Requires-Dist: protobuf>=6.30.1; extra == "llm-oga-npu"
58
64
  Requires-Dist: lemonade-sdk[llm]; extra == "llm-oga-npu"
59
65
  Provides-Extra: llm-oga-hybrid
60
- Requires-Dist: onnx==1.16.1; extra == "llm-oga-hybrid"
61
- Requires-Dist: numpy==1.26.4; extra == "llm-oga-hybrid"
62
- Requires-Dist: protobuf>=6.30.1; extra == "llm-oga-hybrid"
66
+ Requires-Dist: lemonade-sdk[oga-hybrid-minimal]; extra == "llm-oga-hybrid"
63
67
  Requires-Dist: lemonade-sdk[llm]; extra == "llm-oga-hybrid"
64
68
  Provides-Extra: llm-oga-unified
65
69
  Requires-Dist: lemonade-sdk[llm-oga-hybrid]; extra == "llm-oga-unified"
@@ -78,24 +82,82 @@ Dynamic: summary
78
82
 
79
83
  ## 🍋 Lemonade SDK: Quickly serve, benchmark and deploy LLMs
80
84
 
81
- The [Lemonade SDK](./docs/README.md) is designed to make it easy to serve, benchmark, and deploy large language models (LLMs) on a variety of hardware platforms, including CPU, GPU, and NPU.
85
+ The [Lemonade SDK](./docs/README.md) makes it easy to run Large Language Models (LLMs) on your PC. Our focus is using the best tools, such as neural processing units (NPUs) and Vulkan GPU acceleration, to maximize LLM speed and responsiveness.
82
86
 
83
87
  <div align="center">
84
88
  <img src="https://download.amd.com/images/lemonade_640x480_1.gif" alt="Lemonade Demo" title="Lemonade in Action">
85
89
  </div>
86
90
 
91
+ ### Features
92
+
87
93
  The [Lemonade SDK](./docs/README.md) is comprised of the following:
88
94
 
89
- - 🌐 **Lemonade Server**: A server interface that uses the standard Open AI API, allowing applications to integrate with local LLMs.
90
- - 🐍 **Lemonade Python API**: Offers High-Level API for easy integration of Lemonade LLMs into Python applications and Low-Level API for custom experiments.
91
- - 🖥️ **Lemonade CLI**: The `lemonade` CLI lets you mix-and-match LLMs, frameworks (PyTorch, ONNX, GGUF), and measurement tools to run experiments. The available tools are:
92
- - Prompting an LLM.
93
- - Measuring the accuracy of an LLM using a variety of tests.
94
- - Benchmarking an LLM to get the time-to-first-token and tokens per second.
95
- - Profiling the memory usage of an LLM.
95
+ - 🌐 **[Lemonade Server](https://lemonade-server.ai/docs)**: A local LLM server for running ONNX and GGUF models using the OpenAI API standard. Install and enable your applications with NPU and GPU acceleration in minutes.
96
+ - 🐍 **Lemonade API**: High-level Python API to directly integrate Lemonade LLMs into Python applications.
97
+ - 🖥️ **Lemonade CLI**: The `lemonade` CLI lets you mix-and-match LLMs (ONNX, GGUF, SafeTensors) with measurement tools to characterize your models on your hardware. The available tools are:
98
+ - Prompting with templates.
99
+ - Measuring accuracy with a variety of tests.
100
+ - Benchmarking to get the time-to-first-token and tokens per second.
101
+ - Profiling the memory utilization.
96
102
 
97
103
  ### [Click here to get started with Lemonade.](./docs/README.md)
98
104
 
105
+ ### Supported Configurations
106
+
107
+ Maximum LLM performance requires the right hardware accelerator with the right inference engine for your scenario. Lemonade supports the following configurations, while also making it easy to switch between them at runtime.
108
+
109
+ <table border="1" cellpadding="6" cellspacing="0">
110
+ <thead>
111
+ <tr>
112
+ <th rowspan="2">Hardware</th>
113
+ <th colspan="3" align="center">🛠️ Engine Support</th>
114
+ <th colspan="2" align="center">🖥️ OS (x86/x64)</th>
115
+ </tr>
116
+ <tr>
117
+ <th align="center">OGA</th>
118
+ <th align="center">llamacpp</th>
119
+ <th align="center">HF</th>
120
+ <th align="center">Windows</th>
121
+ <th align="center">Linux</th>
122
+ </tr>
123
+ </thead>
124
+ <tbody>
125
+ <tr>
126
+ <td>🧠 CPU</td>
127
+ <td align="center">All platforms</td>
128
+ <td align="center">All platforms</td>
129
+ <td align="center">All platforms</td>
130
+ <td align="center">✅</td>
131
+ <td align="center">✅</td>
132
+ </tr>
133
+ <tr>
134
+ <td>🎮 GPU</td>
135
+ <td align="center">—</td>
136
+ <td align="center">Vulkan: All platforms<br><small>Focus: Radeon™ 7000/9000</small></td>
137
+ <td align="center">—</td>
138
+ <td align="center">✅</td>
139
+ <td align="center">✅</td>
140
+ </tr>
141
+ <tr>
142
+ <td>🤖 NPU</td>
143
+ <td align="center">AMD Ryzen™ AI 300 series</td>
144
+ <td align="center">—</td>
145
+ <td align="center">—</td>
146
+ <td align="center">✅</td>
147
+ <td align="center">—</td>
148
+ </tr>
149
+ </tbody>
150
+ </table>
151
+
152
+
153
+
154
+ #### Inference Engines Overview
155
+ | Engine | Description |
156
+ | :--- | :--- |
157
+ | **OnnxRuntime GenAI (OGA)** | Microsoft engine that runs `.onnx` models and enables hardware vendors to provide their own execution providers (EPs) to support specialized hardware, such as neural processing units (NPUs). |
158
+ | **llamacpp** | Community-driven engine with strong GPU acceleration, support for thousands of `.gguf` models, and advanced features such as vision-language models (VLMs) and mixture-of-experts (MoEs). |
159
+ | **Hugging Face (HF)** | Hugging Face's `transformers` library can run the original `.safetensors` trained weights for models on Meta's PyTorch engine, which provides a source of truth for accuracy measurement. |
160
+
99
161
  ## Contributing
100
162
 
101
163
  We are actively seeking collaborators from across the industry. If you would like to contribute to this project, please check out our [contribution guide](./docs/contribute.md).
@@ -0,0 +1,97 @@
1
+ [![Lemonade tests](https://github.com/lemonade-sdk/lemonade/actions/workflows/test_lemonade.yml/badge.svg)](https://github.com/lemonade-sdk/lemonade/tree/main/test "Check out our tests")
2
+ [![OS - Windows | Linux](https://img.shields.io/badge/OS-windows%20%7C%20linux-blue)](docs/README.md#installation "Check out our instructions")
3
+ [![Made with Python](https://img.shields.io/badge/Python-3.8,3.10-blue?logo=python&logoColor=white)](docs/README.md#installation "Check out our instructions")
4
+
5
+ ## 🍋 Lemonade SDK: Quickly serve, benchmark and deploy LLMs
6
+
7
+ The [Lemonade SDK](./docs/README.md) makes it easy to run Large Language Models (LLMs) on your PC. Our focus is using the best tools, such as neural processing units (NPUs) and Vulkan GPU acceleration, to maximize LLM speed and responsiveness.
8
+
9
+ <div align="center">
10
+ <img src="https://download.amd.com/images/lemonade_640x480_1.gif" alt="Lemonade Demo" title="Lemonade in Action">
11
+ </div>
12
+
13
+ ### Features
14
+
15
+ The [Lemonade SDK](./docs/README.md) is comprised of the following:
16
+
17
+ - 🌐 **[Lemonade Server](https://lemonade-server.ai/docs)**: A local LLM server for running ONNX and GGUF models using the OpenAI API standard. Install and enable your applications with NPU and GPU acceleration in minutes.
18
+ - 🐍 **Lemonade API**: High-level Python API to directly integrate Lemonade LLMs into Python applications.
19
+ - 🖥️ **Lemonade CLI**: The `lemonade` CLI lets you mix-and-match LLMs (ONNX, GGUF, SafeTensors) with measurement tools to characterize your models on your hardware. The available tools are:
20
+ - Prompting with templates.
21
+ - Measuring accuracy with a variety of tests.
22
+ - Benchmarking to get the time-to-first-token and tokens per second.
23
+ - Profiling the memory utilization.
24
+
25
+ ### [Click here to get started with Lemonade.](./docs/README.md)
26
+
27
+ ### Supported Configurations
28
+
29
+ Maximum LLM performance requires the right hardware accelerator with the right inference engine for your scenario. Lemonade supports the following configurations, while also making it easy to switch between them at runtime.
30
+
31
+ <table border="1" cellpadding="6" cellspacing="0">
32
+ <thead>
33
+ <tr>
34
+ <th rowspan="2">Hardware</th>
35
+ <th colspan="3" align="center">🛠️ Engine Support</th>
36
+ <th colspan="2" align="center">🖥️ OS (x86/x64)</th>
37
+ </tr>
38
+ <tr>
39
+ <th align="center">OGA</th>
40
+ <th align="center">llamacpp</th>
41
+ <th align="center">HF</th>
42
+ <th align="center">Windows</th>
43
+ <th align="center">Linux</th>
44
+ </tr>
45
+ </thead>
46
+ <tbody>
47
+ <tr>
48
+ <td>🧠 CPU</td>
49
+ <td align="center">All platforms</td>
50
+ <td align="center">All platforms</td>
51
+ <td align="center">All platforms</td>
52
+ <td align="center">✅</td>
53
+ <td align="center">✅</td>
54
+ </tr>
55
+ <tr>
56
+ <td>🎮 GPU</td>
57
+ <td align="center">—</td>
58
+ <td align="center">Vulkan: All platforms<br><small>Focus: Radeon™ 7000/9000</small></td>
59
+ <td align="center">—</td>
60
+ <td align="center">✅</td>
61
+ <td align="center">✅</td>
62
+ </tr>
63
+ <tr>
64
+ <td>🤖 NPU</td>
65
+ <td align="center">AMD Ryzen™ AI 300 series</td>
66
+ <td align="center">—</td>
67
+ <td align="center">—</td>
68
+ <td align="center">✅</td>
69
+ <td align="center">—</td>
70
+ </tr>
71
+ </tbody>
72
+ </table>
73
+
74
+
75
+
76
+ #### Inference Engines Overview
77
+ | Engine | Description |
78
+ | :--- | :--- |
79
+ | **OnnxRuntime GenAI (OGA)** | Microsoft engine that runs `.onnx` models and enables hardware vendors to provide their own execution providers (EPs) to support specialized hardware, such as neural processing units (NPUs). |
80
+ | **llamacpp** | Community-driven engine with strong GPU acceleration, support for thousands of `.gguf` models, and advanced features such as vision-language models (VLMs) and mixture-of-experts (MoEs). |
81
+ | **Hugging Face (HF)** | Hugging Face's `transformers` library can run the original `.safetensors` trained weights for models on Meta's PyTorch engine, which provides a source of truth for accuracy measurement. |
82
+
83
+ ## Contributing
84
+
85
+ We are actively seeking collaborators from across the industry. If you would like to contribute to this project, please check out our [contribution guide](./docs/contribute.md).
86
+
87
+ ## Maintainers
88
+
89
+ This project is sponsored by AMD. It is maintained by @danielholanda @jeremyfowers @ramkrishna @vgodsoe in equal measure. You can reach us by filing an [issue](https://github.com/lemonade-sdk/lemonade/issues) or email [lemonade@amd.com](mailto:lemonade@amd.com).
90
+
91
+ ## License
92
+
93
+ This project is licensed under the [Apache 2.0 License](https://github.com/lemonade-sdk/lemonade/blob/main/LICENSE). Portions of the project are licensed as described in [NOTICE.md](./NOTICE.md).
94
+
95
+ <!--This file was originally licensed under Apache 2.0. It has been modified.
96
+ Modifications Copyright (c) 2025 AMD-->
97
+
@@ -14,54 +14,73 @@ setup(
14
14
  "lemonade.profilers",
15
15
  "lemonade.common",
16
16
  "lemonade.tools",
17
- "lemonade.tools.ort_genai",
17
+ "lemonade.tools.huggingface",
18
+ "lemonade.tools.oga",
19
+ "lemonade.tools.llamacpp",
18
20
  "lemonade.tools.quark",
19
21
  "lemonade.tools.report",
22
+ "lemonade.tools.server.utils",
20
23
  "lemonade.tools.server",
21
24
  "lemonade_install",
22
25
  "lemonade_server",
23
26
  ],
24
27
  install_requires=[
28
+ # Minimal dependencies required for end-users who are running
29
+ # apps deployed on Lemonade SDK
25
30
  "invoke>=2.0.0",
26
31
  "onnx>=1.11.0,<1.18.0",
27
- "torch>=1.12.1",
28
32
  "pyyaml>=5.4",
29
33
  "typeguard>=2.3.13",
30
34
  "packaging>=20.9",
31
35
  # Necessary until upstream packages account for the breaking
32
36
  # change to numpy
33
37
  "numpy<2.0.0",
34
- "pandas>=1.5.3",
35
38
  "fasteners",
36
39
  "GitPython>=3.1.40",
37
40
  "psutil>=6.1.1",
38
41
  "wmi",
42
+ "py-cpuinfo",
39
43
  "pytz",
40
44
  "zstandard",
41
- "matplotlib",
45
+ "fastapi",
46
+ "uvicorn[standard]",
47
+ "openai>=1.81.0",
48
+ "transformers<=4.51.3",
49
+ "jinja2",
42
50
  "tabulate",
43
51
  # huggingface-hub==0.31.0 introduces a new transfer protocol that was causing us issues
44
52
  "huggingface-hub==0.30.2",
45
53
  ],
46
54
  extras_require={
55
+ # The -minimal extras are meant to deploy specific backends into end-user
56
+ # applications, without including developer-focused tools
57
+ "oga-hybrid-minimal": [
58
+ # Note: `lemonade-install --ryzenai hybrid` is necessary
59
+ # to complete installation
60
+ "onnx==1.16.1",
61
+ "numpy==1.26.4",
62
+ "protobuf>=6.30.1",
63
+ ],
64
+ "oga-cpu-minimal": [
65
+ "onnxruntime-genai==0.6.0",
66
+ "onnxruntime >=1.10.1,<1.22.0",
67
+ ],
47
68
  "llm": [
69
+ # Minimal dependencies for developers to use all features of
70
+ # Lemonade SDK, including building and optimizing models
48
71
  "torch>=2.6.0",
49
- "transformers<=4.51.3",
50
72
  "accelerate",
51
- "py-cpuinfo",
52
73
  "sentencepiece",
53
74
  "datasets",
75
+ "pandas>=1.5.3",
76
+ "matplotlib",
54
77
  # Install human-eval from a forked repo with Windows support until the
55
78
  # PR (https://github.com/openai/human-eval/pull/53) is merged
56
79
  "human-eval-windows==1.0.4",
57
- "fastapi",
58
- "uvicorn[standard]",
59
- "openai>=1.81.0",
60
80
  "lm-eval[api]",
61
81
  ],
62
82
  "llm-oga-cpu": [
63
- "onnxruntime-genai==0.6.0",
64
- "onnxruntime >=1.10.1,<1.22.0",
83
+ "lemonade-sdk[oga-cpu-minimal]",
65
84
  "lemonade-sdk[llm]",
66
85
  ],
67
86
  "llm-oga-igpu": [
@@ -84,9 +103,7 @@ setup(
84
103
  "lemonade-sdk[llm]",
85
104
  ],
86
105
  "llm-oga-hybrid": [
87
- "onnx==1.16.1",
88
- "numpy==1.26.4",
89
- "protobuf>=6.30.1",
106
+ "lemonade-sdk[oga-hybrid-minimal]",
90
107
  "lemonade-sdk[llm]",
91
108
  ],
92
109
  "llm-oga-unified": [
@@ -57,7 +57,7 @@ def from_pretrained(
57
57
  # Huggingface supports all checkpoints, so there is nothing to check for
58
58
 
59
59
  import torch
60
- from lemonade.tools.huggingface_load import HuggingfaceLoad
60
+ from lemonade.tools.huggingface.load import HuggingfaceLoad
61
61
 
62
62
  state = _make_state(recipe, checkpoint)
63
63
 
@@ -73,7 +73,7 @@ def from_pretrained(
73
73
  # Huggingface Transformers recipe for discrete GPU (Nvidia, Instinct, Radeon)
74
74
 
75
75
  import torch
76
- from lemonade.tools.huggingface_load import HuggingfaceLoad
76
+ from lemonade.tools.huggingface.load import HuggingfaceLoad
77
77
 
78
78
  state = _make_state(recipe, checkpoint)
79
79
 
@@ -87,7 +87,7 @@ def from_pretrained(
87
87
  return state.model, state.tokenizer
88
88
 
89
89
  elif recipe.startswith("oga-"):
90
- import lemonade.tools.ort_genai.oga as oga
90
+ import lemonade.tools.oga.load as oga
91
91
 
92
92
  # Make sure the user chose a supported runtime, e.g., oga-cpu
93
93
  user_backend = recipe.split("oga-")[1]
@@ -1,4 +1,8 @@
1
1
  import os
2
+
3
+ # pylint: disable=C0413
4
+ # Prevent HF warnings from showing on every import
5
+ os.environ["TRANSFORMERS_NO_ADVISORY_WARNINGS"] = "1"
2
6
  from lemonade.version import __version__ as version_number
3
7
  from lemonade.tools import FirstTool, NiceHelpFormatter
4
8
  from lemonade.profilers.memory_tracker import MemoryTracker
@@ -8,12 +12,12 @@ from lemonade.sequence import Sequence
8
12
  from lemonade.tools.management_tools import Cache, Version, SystemInfo
9
13
  from lemonade.state import State
10
14
 
11
- from lemonade.tools.huggingface_load import HuggingfaceLoad
12
-
13
- from lemonade.tools.huggingface_bench import HuggingfaceBench
14
- from lemonade.tools.ort_genai.oga_bench import OgaBench
15
- from lemonade.tools.llamacpp_bench import LlamaCppBench
16
- from lemonade.tools.llamacpp import LoadLlamaCpp
15
+ from lemonade.tools.huggingface.load import HuggingfaceLoad
16
+ from lemonade.tools.huggingface.bench import HuggingfaceBench
17
+ from lemonade.tools.oga.load import OgaLoad
18
+ from lemonade.tools.oga.bench import OgaBench
19
+ from lemonade.tools.llamacpp.bench import LlamaCppBench
20
+ from lemonade.tools.llamacpp.load import LoadLlamaCpp
17
21
 
18
22
  import lemonade.cache as cache
19
23
  from lemonade.tools.mmlu import AccuracyMMLU
@@ -24,7 +28,6 @@ from lemonade.tools.prompt import LLMPrompt
24
28
  from lemonade.tools.quark.quark_load import QuarkLoad
25
29
  from lemonade.tools.quark.quark_quantize import QuarkQuantize
26
30
  from lemonade.tools.report.llm_report import LemonadeReport
27
- from lemonade.tools.server.serve import Server
28
31
 
29
32
 
30
33
  def main():
@@ -40,26 +43,17 @@ def main():
40
43
  LMEvalHarness,
41
44
  LLMPrompt,
42
45
  HuggingfaceBench,
46
+ OgaLoad,
43
47
  OgaBench,
44
48
  QuarkQuantize,
45
49
  QuarkLoad,
46
50
  LemonadeReport,
47
- Server,
48
51
  # Inherited from lemonade
49
52
  Cache,
50
53
  Version,
51
54
  SystemInfo,
52
55
  ]
53
56
 
54
- # Import onnxruntime-genai recipes
55
- try:
56
- from lemonade.tools.ort_genai.oga import OgaLoad
57
-
58
- tools = tools + [OgaLoad]
59
-
60
- except ModuleNotFoundError:
61
- pass
62
-
63
57
  # List the available profilers
64
58
  profilers = [MemoryTracker]
65
59
 
@@ -6,8 +6,6 @@ from typing import Dict
6
6
  import hashlib
7
7
  import psutil
8
8
  import yaml
9
- import torch
10
- import numpy as np
11
9
  import lemonade.common.exceptions as exp
12
10
 
13
11
  state_file_name = "state.yaml"
@@ -101,51 +99,6 @@ def unique_id():
101
99
  return hashlib.sha256(f"{pid}{start_time}".encode()).hexdigest()
102
100
 
103
101
 
104
- def get_shapes_and_dtypes(inputs: dict):
105
- """
106
- Return the shape and data type of each value in the inputs dict
107
- """
108
- shapes = {}
109
- dtypes = {}
110
- for key in sorted(inputs):
111
- value = inputs[key]
112
- if isinstance(
113
- value,
114
- (list, tuple),
115
- ):
116
- for v, i in zip(value, range(len(value))):
117
- if isinstance(v, (list, tuple)):
118
- # Handle nested lists/tuples, for example past_key_values
119
- # in an LLM that has KV-caching enabled
120
- for v2, i2 in zip(v, range(len(v))):
121
- subsubkey = f"{key}[{i}][{i2}]"
122
- shapes[subsubkey] = np.array(v2).shape
123
- dtypes[subsubkey] = np.array(v2).dtype.name
124
- else:
125
- # Handle single list/tuple
126
- subkey = f"{key}[{i}]"
127
- shapes[subkey] = np.array(v).shape
128
- dtypes[subkey] = np.array(v).dtype.name
129
- elif torch.is_tensor(value):
130
- shapes[key] = np.array(value.detach()).shape
131
- dtypes[key] = np.array(value.detach()).dtype.name
132
- elif isinstance(value, np.ndarray):
133
- shapes[key] = value.shape
134
- dtypes[key] = value.dtype.name
135
- elif isinstance(value, (bool, int, float)):
136
- shapes[key] = (1,)
137
- dtypes[key] = type(value).__name__
138
- elif value is None:
139
- pass
140
- else:
141
- raise exp.Error(
142
- "One of the provided inputs contains the unsupported "
143
- f' type {type(value)} at key "{key}".'
144
- )
145
-
146
- return shapes, dtypes
147
-
148
-
149
102
  class Logger:
150
103
  """
151
104
  Redirects stdout to file (and console if needed)
@@ -0,0 +1,50 @@
1
+ import os
2
+ from typing import Optional
3
+ import socket
4
+ from huggingface_hub import model_info
5
+
6
+
7
+ def is_offline():
8
+ """
9
+ Check if the system is offline by attempting to connect to huggingface.co.
10
+
11
+ Returns:
12
+ bool: True if the system is offline (cannot connect to huggingface.co),
13
+ False otherwise.
14
+ """
15
+ if os.environ.get("LEMONADE_OFFLINE"):
16
+ return True
17
+ try:
18
+ socket.gethostbyname("huggingface.co")
19
+ return False
20
+ except socket.gaierror:
21
+ return True
22
+
23
+
24
+ def get_base_model(checkpoint: str) -> Optional[str]:
25
+ """
26
+ Get the base model information for a given checkpoint from the Hugging Face Hub.
27
+ Will auto-detect if we're offline and skip the network call in that case.
28
+
29
+ Args:
30
+ checkpoint: The model checkpoint to query
31
+
32
+ Returns:
33
+ The base model name if found, or None if not found or error occurs
34
+ """
35
+ # Skip network call in offline mode
36
+ if is_offline():
37
+ return None
38
+
39
+ try:
40
+ info = model_info(checkpoint)
41
+ if info.cardData and "base_model" in info.cardData:
42
+ if info.cardData["base_model"] is not None:
43
+ # This is a derived model
44
+ return info.cardData["base_model"]
45
+ else:
46
+ # This is itself a base model
47
+ return [checkpoint]
48
+ except Exception: # pylint: disable=broad-except
49
+ pass
50
+ return None
@@ -7,12 +7,10 @@ import dataclasses
7
7
  from typing import Callable, List, Union, Dict, Optional
8
8
  import textwrap
9
9
  import psutil
10
- import torch
11
10
  from lemonade.common import printing
12
11
  from lemonade.state import State
13
12
  import lemonade.common.build as build
14
13
  import lemonade.common.filesystem as fs
15
- import lemonade.common.analyze_model as analyze_model
16
14
 
17
15
 
18
16
  def _pretty_print_key(key: str) -> str:
@@ -64,7 +62,6 @@ class SkipFields:
64
62
 
65
63
  file_name: bool = False
66
64
  model_name: bool = False
67
- parameters: bool = False
68
65
  location: bool = False
69
66
  input_shape: bool = False
70
67
  build_dir: bool = False
@@ -147,18 +144,6 @@ class UniqueInvocationInfo(BasicInfo):
147
144
  print(f", line {self.line}")
148
145
  self.skip.location = True
149
146
 
150
- def _print_parameters(self):
151
- if self.skip.parameters or self.params is None:
152
- return
153
-
154
- # Display number of parameters and size
155
- parameters_size = parameters_to_size(self.params)
156
- print(
157
- f"{self.indent}\tParameters:\t{'{:,}'.format(self.params)} ({parameters_size})"
158
- )
159
-
160
- self.skip.parameters = True
161
-
162
147
  def _print_unique_input_shape(
163
148
  self,
164
149
  exec_time_formatted: str,
@@ -348,7 +333,6 @@ class UniqueInvocationInfo(BasicInfo):
348
333
  if (self.depth == 0 and not model_visited) or (self.depth != 0):
349
334
  # Print this information only once per model
350
335
  self._print_location()
351
- self._print_parameters()
352
336
  self._print_unique_input_shape(
353
337
  exec_time_formatted, invocation_idx, multiple_unique_invocations
354
338
  )
@@ -362,16 +346,13 @@ class UniqueInvocationInfo(BasicInfo):
362
346
 
363
347
  @dataclasses.dataclass
364
348
  class ModelInfo(BasicInfo):
365
- model: torch.nn.Module = None
349
+ model: str = None
366
350
  old_forward: Union[Callable, None] = None
367
351
  unique_invocations: Union[Dict[str, UniqueInvocationInfo], None] = (
368
352
  dataclasses.field(default_factory=dict)
369
353
  )
370
354
  last_unique_invocation_executed: Union[str, None] = None
371
355
 
372
- def __post_init__(self):
373
- self.params = analyze_model.count_parameters(self.model)
374
-
375
356
 
376
357
  def recursive_print(
377
358
  models_found: Dict[str, ModelInfo],
@@ -447,7 +428,7 @@ def stop_logger_forward() -> None:
447
428
  def add_to_state(
448
429
  state: State,
449
430
  name: str,
450
- model: Union[str, torch.nn.Module],
431
+ model: str,
451
432
  extension: str = "",
452
433
  input_shapes: Optional[Dict] = None,
453
434
  ):
@@ -3,6 +3,7 @@ import importlib.metadata
3
3
  import platform
4
4
  import re
5
5
  import subprocess
6
+ import ctypes
6
7
 
7
8
 
8
9
  class SystemInfo(ABC):
@@ -184,11 +185,25 @@ class WindowsSystemInfo(SystemInfo):
184
185
  str: Windows power setting.
185
186
  """
186
187
  try:
187
- out = subprocess.check_output(["powercfg", "/getactivescheme"]).decode()
188
- return re.search(r"\((.*?)\)", out).group(1)
188
+ # Capture output as bytes
189
+ out_bytes = subprocess.check_output(["powercfg", "/getactivescheme"])
190
+
191
+ # Get system's OEM code page (e.g., cp437, cp850)
192
+ oem_cp = "cp" + str(ctypes.windll.kernel32.GetOEMCP())
193
+
194
+ # Decode using detected OEM code page
195
+ out = out_bytes.decode(oem_cp)
196
+
197
+ # Extract power scheme name from parentheses
198
+ match = re.search(r"\((.*?)\)", out)
199
+ if match:
200
+ return match.group(1)
201
+ return "Power scheme name not found in output"
202
+
189
203
  except subprocess.CalledProcessError:
190
- pass
191
- return "Windows power setting not found"
204
+ return "Windows power setting not found (command failed)"
205
+ except Exception as e: # pylint: disable=broad-except
206
+ return f"Error retrieving power setting: {str(e)}"
192
207
 
193
208
  def get_dict(self) -> dict:
194
209
  """