lemonade-sdk 8.0.6__tar.gz → 8.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lemonade-sdk might be problematic. Click here for more details.
- {lemonade_sdk-8.0.6/src/lemonade_sdk.egg-info → lemonade_sdk-8.1.0}/PKG-INFO +30 -19
- {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/README.md +6 -2
- {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/setup.py +25 -18
- {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/common/network.py +18 -1
- {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/tools/llamacpp/bench.py +3 -1
- {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/tools/llamacpp/utils.py +7 -7
- {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/tools/oga/load.py +239 -112
- {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/tools/oga/utils.py +19 -7
- {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/tools/server/serve.py +19 -28
- {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/tools/server/static/styles.css +5 -6
- {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/tools/server/static/webapp.html +3 -0
- lemonade_sdk-8.1.0/src/lemonade/version.py +1 -0
- {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade_install/install.py +65 -84
- {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0/src/lemonade_sdk.egg-info}/PKG-INFO +30 -19
- {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade_sdk.egg-info/requires.txt +17 -7
- {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade_server/cli.py +1 -1
- {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade_server/model_manager.py +4 -3
- {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade_server/pydantic_models.py +1 -4
- {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade_server/server_models.json +35 -11
- lemonade_sdk-8.0.6/src/lemonade/version.py +0 -1
- {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/LICENSE +0 -0
- {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/NOTICE.md +0 -0
- {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/setup.cfg +0 -0
- {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/__init__.py +0 -0
- {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/api.py +0 -0
- {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/cache.py +0 -0
- {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/cli.py +0 -0
- {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/common/__init__.py +0 -0
- {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/common/build.py +0 -0
- {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/common/cli_helpers.py +0 -0
- {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/common/exceptions.py +0 -0
- {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/common/filesystem.py +0 -0
- {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/common/inference_engines.py +0 -0
- {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/common/printing.py +0 -0
- {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/common/status.py +0 -0
- {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/common/system_info.py +0 -0
- {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/common/test_helpers.py +0 -0
- {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/profilers/__init__.py +0 -0
- {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/profilers/memory_tracker.py +0 -0
- {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/profilers/profiler.py +0 -0
- {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/sequence.py +0 -0
- {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/state.py +0 -0
- {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/tools/__init__.py +0 -0
- {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/tools/accuracy.py +0 -0
- {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/tools/adapter.py +0 -0
- {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/tools/bench.py +0 -0
- {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/tools/huggingface/bench.py +0 -0
- {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/tools/huggingface/load.py +0 -0
- {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/tools/huggingface/utils.py +0 -0
- {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/tools/humaneval.py +0 -0
- {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/tools/llamacpp/load.py +0 -0
- {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/tools/management_tools.py +0 -0
- {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/tools/mmlu.py +0 -0
- {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/tools/oga/__init__.py +0 -0
- {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/tools/oga/bench.py +0 -0
- {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/tools/perplexity.py +0 -0
- {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/tools/prompt.py +0 -0
- {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/tools/quark/__init__.py +0 -0
- {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/tools/quark/quark_load.py +0 -0
- {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/tools/quark/quark_quantize.py +0 -0
- {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/tools/report/__init__.py +0 -0
- {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/tools/report/llm_report.py +0 -0
- {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/tools/report/table.py +0 -0
- {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/tools/server/__init__.py +0 -0
- {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/tools/server/llamacpp.py +0 -0
- {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/tools/server/static/favicon.ico +0 -0
- {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/tools/server/tool_calls.py +0 -0
- {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/tools/server/tray.py +0 -0
- {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/tools/server/utils/port.py +0 -0
- {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/tools/server/utils/system_tray.py +0 -0
- {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/tools/server/utils/thread.py +0 -0
- {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/tools/server/webapp.py +0 -0
- {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/tools/tool.py +0 -0
- {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade_install/__init__.py +0 -0
- {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade_sdk.egg-info/SOURCES.txt +0 -0
- {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade_sdk.egg-info/dependency_links.txt +0 -0
- {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade_sdk.egg-info/entry_points.txt +0 -0
- {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade_sdk.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: lemonade-sdk
|
|
3
|
-
Version: 8.0
|
|
3
|
+
Version: 8.1.0
|
|
4
4
|
Summary: Lemonade SDK: Your LLM Aide for Validation and Deployment
|
|
5
5
|
Author-email: lemonade@amd.com
|
|
6
6
|
Requires-Python: >=3.10, <3.13
|
|
@@ -22,16 +22,15 @@ Requires-Dist: pytz
|
|
|
22
22
|
Requires-Dist: zstandard
|
|
23
23
|
Requires-Dist: fastapi
|
|
24
24
|
Requires-Dist: uvicorn[standard]
|
|
25
|
-
Requires-Dist: openai
|
|
25
|
+
Requires-Dist: openai<1.97.1,>=1.81.0
|
|
26
26
|
Requires-Dist: transformers<=4.53.2
|
|
27
27
|
Requires-Dist: jinja2
|
|
28
28
|
Requires-Dist: tabulate
|
|
29
29
|
Requires-Dist: sentencepiece
|
|
30
30
|
Requires-Dist: huggingface-hub==0.33.0
|
|
31
|
-
Provides-Extra: oga-
|
|
32
|
-
Requires-Dist:
|
|
33
|
-
Requires-Dist:
|
|
34
|
-
Requires-Dist: protobuf>=6.30.1; extra == "oga-hybrid"
|
|
31
|
+
Provides-Extra: oga-ryzenai
|
|
32
|
+
Requires-Dist: onnxruntime-genai-directml-ryzenai==0.7.0.2; extra == "oga-ryzenai"
|
|
33
|
+
Requires-Dist: protobuf>=6.30.1; extra == "oga-ryzenai"
|
|
35
34
|
Provides-Extra: oga-cpu
|
|
36
35
|
Requires-Dist: onnxruntime-genai==0.8.2; extra == "oga-cpu"
|
|
37
36
|
Requires-Dist: onnxruntime>=1.22.0; extra == "oga-cpu"
|
|
@@ -43,14 +42,32 @@ Requires-Dist: pandas>=1.5.3; extra == "dev"
|
|
|
43
42
|
Requires-Dist: matplotlib; extra == "dev"
|
|
44
43
|
Requires-Dist: human-eval-windows==1.0.4; extra == "dev"
|
|
45
44
|
Requires-Dist: lm-eval[api]; extra == "dev"
|
|
45
|
+
Provides-Extra: oga-hybrid
|
|
46
|
+
Requires-Dist: lemonade-sdk[oga-ryzenai]; extra == "oga-hybrid"
|
|
47
|
+
Provides-Extra: oga-unified
|
|
48
|
+
Requires-Dist: lemonade-sdk[oga-ryzenai]; extra == "oga-unified"
|
|
46
49
|
Provides-Extra: oga-hybrid-minimal
|
|
47
|
-
Requires-Dist: lemonade-sdk[oga-
|
|
50
|
+
Requires-Dist: lemonade-sdk[oga-ryzenai]; extra == "oga-hybrid-minimal"
|
|
48
51
|
Provides-Extra: oga-cpu-minimal
|
|
49
52
|
Requires-Dist: lemonade-sdk[oga-cpu]; extra == "oga-cpu-minimal"
|
|
53
|
+
Provides-Extra: oga-npu-minimal
|
|
54
|
+
Requires-Dist: lemonade-sdk[oga-ryzenai]; extra == "oga-npu-minimal"
|
|
50
55
|
Provides-Extra: llm
|
|
51
56
|
Requires-Dist: lemonade-sdk[dev]; extra == "llm"
|
|
52
57
|
Provides-Extra: llm-oga-cpu
|
|
53
58
|
Requires-Dist: lemonade-sdk[dev,oga-cpu]; extra == "llm-oga-cpu"
|
|
59
|
+
Provides-Extra: llm-oga-npu
|
|
60
|
+
Requires-Dist: onnx==1.16.0; extra == "llm-oga-npu"
|
|
61
|
+
Requires-Dist: onnxruntime==1.18.0; extra == "llm-oga-npu"
|
|
62
|
+
Requires-Dist: numpy==1.26.4; extra == "llm-oga-npu"
|
|
63
|
+
Requires-Dist: protobuf>=6.30.1; extra == "llm-oga-npu"
|
|
64
|
+
Requires-Dist: lemonade-sdk[dev]; extra == "llm-oga-npu"
|
|
65
|
+
Provides-Extra: llm-oga-hybrid
|
|
66
|
+
Requires-Dist: onnx==1.16.1; extra == "llm-oga-hybrid"
|
|
67
|
+
Requires-Dist: numpy==1.26.4; extra == "llm-oga-hybrid"
|
|
68
|
+
Requires-Dist: protobuf>=6.30.1; extra == "llm-oga-hybrid"
|
|
69
|
+
Provides-Extra: llm-oga-unified
|
|
70
|
+
Requires-Dist: lemonade-sdk[dev,llm-oga-hybrid]; extra == "llm-oga-unified"
|
|
54
71
|
Provides-Extra: llm-oga-igpu
|
|
55
72
|
Requires-Dist: onnxruntime-genai-directml==0.6.0; extra == "llm-oga-igpu"
|
|
56
73
|
Requires-Dist: onnxruntime-directml<1.22.0,>=1.19.0; extra == "llm-oga-igpu"
|
|
@@ -61,16 +78,6 @@ Requires-Dist: onnxruntime-genai-cuda==0.8.2; extra == "llm-oga-cuda"
|
|
|
61
78
|
Requires-Dist: onnxruntime-gpu>=1.22.0; extra == "llm-oga-cuda"
|
|
62
79
|
Requires-Dist: transformers<=4.51.3; extra == "llm-oga-cuda"
|
|
63
80
|
Requires-Dist: lemonade-sdk[dev]; extra == "llm-oga-cuda"
|
|
64
|
-
Provides-Extra: llm-oga-npu
|
|
65
|
-
Requires-Dist: onnx==1.16.0; extra == "llm-oga-npu"
|
|
66
|
-
Requires-Dist: onnxruntime==1.18.0; extra == "llm-oga-npu"
|
|
67
|
-
Requires-Dist: numpy==1.26.4; extra == "llm-oga-npu"
|
|
68
|
-
Requires-Dist: protobuf>=6.30.1; extra == "llm-oga-npu"
|
|
69
|
-
Requires-Dist: lemonade-sdk[dev]; extra == "llm-oga-npu"
|
|
70
|
-
Provides-Extra: llm-oga-hybrid
|
|
71
|
-
Requires-Dist: lemonade-sdk[dev,oga-hybrid]; extra == "llm-oga-hybrid"
|
|
72
|
-
Provides-Extra: llm-oga-unified
|
|
73
|
-
Requires-Dist: lemonade-sdk[llm-oga-hybrid]; extra == "llm-oga-unified"
|
|
74
81
|
Dynamic: author-email
|
|
75
82
|
Dynamic: description
|
|
76
83
|
Dynamic: description-content-type
|
|
@@ -174,7 +181,7 @@ lemonade-server list
|
|
|
174
181
|
|
|
175
182
|
## Model Library
|
|
176
183
|
|
|
177
|
-
Lemonade supports both GGUF and ONNX models as detailed in the [Supported Configuration](#supported-configurations) section. A list of all built-in models is available [here](https://lemonade-server.ai/docs/server/
|
|
184
|
+
Lemonade supports both GGUF and ONNX models as detailed in the [Supported Configuration](#supported-configurations) section. A list of all built-in models is available [here](https://lemonade-server.ai/docs/server/server_models/).
|
|
178
185
|
|
|
179
186
|
You can also import custom GGUF and ONNX models from Hugging Face by using our [Model Manager](http://localhost:8000/#model-management) (requires server to be running).
|
|
180
187
|
<p align="center">
|
|
@@ -263,7 +270,7 @@ completion = client.chat.completions.create(
|
|
|
263
270
|
print(completion.choices[0].message.content)
|
|
264
271
|
```
|
|
265
272
|
|
|
266
|
-
For more detailed integration instructions, see the [Integration Guide](./server_integration.md).
|
|
273
|
+
For more detailed integration instructions, see the [Integration Guide](./docs/server/server_integration.md).
|
|
267
274
|
|
|
268
275
|
## Beyond an LLM Server
|
|
269
276
|
|
|
@@ -272,6 +279,10 @@ The [Lemonade SDK](./docs/README.md) also include the following components:
|
|
|
272
279
|
- 🐍 **[Lemonade API](./docs/lemonade_api.md)**: High-level Python API to directly integrate Lemonade LLMs into Python applications.
|
|
273
280
|
- 🖥️ **[Lemonade CLI](./docs/dev_cli/README.md)**: The `lemonade` CLI lets you mix-and-match LLMs (ONNX, GGUF, SafeTensors) with prompting templates, accuracy testing, performance benchmarking, and memory profiling to characterize your models on your hardware.
|
|
274
281
|
|
|
282
|
+
## FAQ
|
|
283
|
+
|
|
284
|
+
To read our frequently asked questions, see our [FAQ Guide](./docs/faq.md)
|
|
285
|
+
|
|
275
286
|
## Contributing
|
|
276
287
|
|
|
277
288
|
We are actively seeking collaborators from across the industry. If you would like to contribute to this project, please check out our [contribution guide](./docs/contribute.md).
|
|
@@ -92,7 +92,7 @@ lemonade-server list
|
|
|
92
92
|
|
|
93
93
|
## Model Library
|
|
94
94
|
|
|
95
|
-
Lemonade supports both GGUF and ONNX models as detailed in the [Supported Configuration](#supported-configurations) section. A list of all built-in models is available [here](https://lemonade-server.ai/docs/server/
|
|
95
|
+
Lemonade supports both GGUF and ONNX models as detailed in the [Supported Configuration](#supported-configurations) section. A list of all built-in models is available [here](https://lemonade-server.ai/docs/server/server_models/).
|
|
96
96
|
|
|
97
97
|
You can also import custom GGUF and ONNX models from Hugging Face by using our [Model Manager](http://localhost:8000/#model-management) (requires server to be running).
|
|
98
98
|
<p align="center">
|
|
@@ -181,7 +181,7 @@ completion = client.chat.completions.create(
|
|
|
181
181
|
print(completion.choices[0].message.content)
|
|
182
182
|
```
|
|
183
183
|
|
|
184
|
-
For more detailed integration instructions, see the [Integration Guide](./server_integration.md).
|
|
184
|
+
For more detailed integration instructions, see the [Integration Guide](./docs/server/server_integration.md).
|
|
185
185
|
|
|
186
186
|
## Beyond an LLM Server
|
|
187
187
|
|
|
@@ -190,6 +190,10 @@ The [Lemonade SDK](./docs/README.md) also include the following components:
|
|
|
190
190
|
- 🐍 **[Lemonade API](./docs/lemonade_api.md)**: High-level Python API to directly integrate Lemonade LLMs into Python applications.
|
|
191
191
|
- 🖥️ **[Lemonade CLI](./docs/dev_cli/README.md)**: The `lemonade` CLI lets you mix-and-match LLMs (ONNX, GGUF, SafeTensors) with prompting templates, accuracy testing, performance benchmarking, and memory profiling to characterize your models on your hardware.
|
|
192
192
|
|
|
193
|
+
## FAQ
|
|
194
|
+
|
|
195
|
+
To read our frequently asked questions, see our [FAQ Guide](./docs/faq.md)
|
|
196
|
+
|
|
193
197
|
## Contributing
|
|
194
198
|
|
|
195
199
|
We are actively seeking collaborators from across the industry. If you would like to contribute to this project, please check out our [contribution guide](./docs/contribute.md).
|
|
@@ -44,7 +44,7 @@ setup(
|
|
|
44
44
|
"zstandard",
|
|
45
45
|
"fastapi",
|
|
46
46
|
"uvicorn[standard]",
|
|
47
|
-
"openai>=1.81.0",
|
|
47
|
+
"openai>=1.81.0,<1.97.1",
|
|
48
48
|
"transformers<=4.53.2",
|
|
49
49
|
"jinja2",
|
|
50
50
|
"tabulate",
|
|
@@ -54,11 +54,9 @@ setup(
|
|
|
54
54
|
extras_require={
|
|
55
55
|
# The non-dev extras are meant to deploy specific backends into end-user
|
|
56
56
|
# applications, without including developer-focused tools
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
"onnx==1.16.1",
|
|
61
|
-
"numpy==1.26.4",
|
|
57
|
+
# Primary NPU extra using unified PyPI package
|
|
58
|
+
"oga-ryzenai": [
|
|
59
|
+
"onnxruntime-genai-directml-ryzenai==0.7.0.2",
|
|
62
60
|
"protobuf>=6.30.1",
|
|
63
61
|
],
|
|
64
62
|
"oga-cpu": [
|
|
@@ -81,11 +79,31 @@ setup(
|
|
|
81
79
|
"lm-eval[api]",
|
|
82
80
|
],
|
|
83
81
|
# Keep backwards compatibility for old extras names
|
|
84
|
-
"oga-hybrid
|
|
82
|
+
"oga-hybrid": ["lemonade-sdk[oga-ryzenai]"],
|
|
83
|
+
"oga-unified": ["lemonade-sdk[oga-ryzenai]"],
|
|
84
|
+
"oga-hybrid-minimal": ["lemonade-sdk[oga-ryzenai]"],
|
|
85
85
|
"oga-cpu-minimal": ["lemonade-sdk[oga-cpu]"],
|
|
86
|
+
"oga-npu-minimal": ["lemonade-sdk[oga-ryzenai]"],
|
|
86
87
|
"llm": ["lemonade-sdk[dev]"],
|
|
87
88
|
"llm-oga-cpu": ["lemonade-sdk[dev,oga-cpu]"],
|
|
88
89
|
# The following extras are deprecated and/or not commonly used
|
|
90
|
+
"llm-oga-npu": [
|
|
91
|
+
"onnx==1.16.0",
|
|
92
|
+
# NPU requires specific onnxruntime version for Ryzen AI compatibility
|
|
93
|
+
# This may conflict with other OGA extras that require >=1.22.0
|
|
94
|
+
"onnxruntime==1.18.0",
|
|
95
|
+
"numpy==1.26.4",
|
|
96
|
+
"protobuf>=6.30.1",
|
|
97
|
+
"lemonade-sdk[dev]",
|
|
98
|
+
],
|
|
99
|
+
"llm-oga-hybrid": [
|
|
100
|
+
# Note: `lemonade-install --ryzenai hybrid` is necessary
|
|
101
|
+
# to complete installation for RAI 1.4.0.
|
|
102
|
+
"onnx==1.16.1",
|
|
103
|
+
"numpy==1.26.4",
|
|
104
|
+
"protobuf>=6.30.1",
|
|
105
|
+
],
|
|
106
|
+
"llm-oga-unified": ["lemonade-sdk[dev, llm-oga-hybrid]"],
|
|
89
107
|
"llm-oga-igpu": [
|
|
90
108
|
"onnxruntime-genai-directml==0.6.0",
|
|
91
109
|
"onnxruntime-directml>=1.19.0,<1.22.0",
|
|
@@ -98,17 +116,6 @@ setup(
|
|
|
98
116
|
"transformers<=4.51.3",
|
|
99
117
|
"lemonade-sdk[dev]",
|
|
100
118
|
],
|
|
101
|
-
"llm-oga-npu": [
|
|
102
|
-
"onnx==1.16.0",
|
|
103
|
-
# NPU requires specific onnxruntime version for Ryzen AI compatibility
|
|
104
|
-
# This may conflict with other OGA extras that require >=1.22.0
|
|
105
|
-
"onnxruntime==1.18.0",
|
|
106
|
-
"numpy==1.26.4",
|
|
107
|
-
"protobuf>=6.30.1",
|
|
108
|
-
"lemonade-sdk[dev]",
|
|
109
|
-
],
|
|
110
|
-
"llm-oga-hybrid": ["lemonade-sdk[dev,oga-hybrid]"],
|
|
111
|
-
"llm-oga-unified": ["lemonade-sdk[llm-oga-hybrid]"],
|
|
112
119
|
},
|
|
113
120
|
classifiers=[],
|
|
114
121
|
entry_points={
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import os
|
|
2
2
|
from typing import Optional
|
|
3
3
|
import socket
|
|
4
|
-
from huggingface_hub import model_info
|
|
4
|
+
from huggingface_hub import model_info, snapshot_download
|
|
5
5
|
|
|
6
6
|
|
|
7
7
|
def is_offline():
|
|
@@ -48,3 +48,20 @@ def get_base_model(checkpoint: str) -> Optional[str]:
|
|
|
48
48
|
except Exception: # pylint: disable=broad-except
|
|
49
49
|
pass
|
|
50
50
|
return None
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def custom_snapshot_download(repo_id, **kwargs):
|
|
54
|
+
"""
|
|
55
|
+
Custom snapshot download with retry logic for Windows symlink privilege errors.
|
|
56
|
+
"""
|
|
57
|
+
for attempt in range(2):
|
|
58
|
+
try:
|
|
59
|
+
return snapshot_download(repo_id=repo_id, **kwargs)
|
|
60
|
+
except OSError as e:
|
|
61
|
+
if (
|
|
62
|
+
hasattr(e, "winerror")
|
|
63
|
+
and e.winerror == 1314 # pylint: disable=no-member
|
|
64
|
+
and attempt < 1
|
|
65
|
+
):
|
|
66
|
+
continue
|
|
67
|
+
raise
|
|
@@ -68,7 +68,9 @@ class LlamaCppBench(Bench):
|
|
|
68
68
|
# and error handling
|
|
69
69
|
model.time_to_first_token = None
|
|
70
70
|
model.tokens_per_second = None
|
|
71
|
-
raw_output, stderr = model.generate(
|
|
71
|
+
raw_output, stderr = model.generate(
|
|
72
|
+
prompt, max_new_tokens=output_tokens, return_raw=True
|
|
73
|
+
)
|
|
72
74
|
|
|
73
75
|
if model.time_to_first_token is None or model.tokens_per_second is None:
|
|
74
76
|
error_msg = (
|
|
@@ -215,10 +215,10 @@ def get_local_checkpoint_path(base_checkpoint, variant):
|
|
|
215
215
|
full_model_path = None
|
|
216
216
|
model_to_use = None
|
|
217
217
|
try:
|
|
218
|
-
from
|
|
218
|
+
from lemonade.common.network import custom_snapshot_download
|
|
219
219
|
|
|
220
|
-
snapshot_path =
|
|
221
|
-
|
|
220
|
+
snapshot_path = custom_snapshot_download(
|
|
221
|
+
base_checkpoint,
|
|
222
222
|
local_files_only=True,
|
|
223
223
|
)
|
|
224
224
|
|
|
@@ -405,10 +405,10 @@ def download_gguf(config_checkpoint, config_mmproj=None) -> dict:
|
|
|
405
405
|
core_files, sharded_files = identify_gguf_models(checkpoint, variant, config_mmproj)
|
|
406
406
|
|
|
407
407
|
# Download the files
|
|
408
|
-
from
|
|
408
|
+
from lemonade.common.network import custom_snapshot_download
|
|
409
409
|
|
|
410
|
-
snapshot_folder =
|
|
411
|
-
|
|
410
|
+
snapshot_folder = custom_snapshot_download(
|
|
411
|
+
checkpoint,
|
|
412
412
|
allow_patterns=list(core_files.values()) + sharded_files,
|
|
413
413
|
)
|
|
414
414
|
|
|
@@ -573,7 +573,7 @@ class LlamaCppAdapter(ModelAdapter):
|
|
|
573
573
|
#
|
|
574
574
|
if "llama_perf_context_print: eval time =" in line:
|
|
575
575
|
parts = line.split("=")[1].split()
|
|
576
|
-
self.response_tokens = int(parts[3])
|
|
576
|
+
self.response_tokens = int(parts[3]) + 1 # include first token
|
|
577
577
|
response_time_ms = float(parts[0])
|
|
578
578
|
self.tokens_per_second = (
|
|
579
579
|
1000 * self.response_tokens / response_time_ms
|