lemonade-sdk 8.0.6__tar.gz → 8.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lemonade-sdk might be problematic. Click here for more details.

Files changed (78) hide show
  1. {lemonade_sdk-8.0.6/src/lemonade_sdk.egg-info → lemonade_sdk-8.1.0}/PKG-INFO +30 -19
  2. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/README.md +6 -2
  3. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/setup.py +25 -18
  4. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/common/network.py +18 -1
  5. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/tools/llamacpp/bench.py +3 -1
  6. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/tools/llamacpp/utils.py +7 -7
  7. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/tools/oga/load.py +239 -112
  8. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/tools/oga/utils.py +19 -7
  9. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/tools/server/serve.py +19 -28
  10. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/tools/server/static/styles.css +5 -6
  11. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/tools/server/static/webapp.html +3 -0
  12. lemonade_sdk-8.1.0/src/lemonade/version.py +1 -0
  13. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade_install/install.py +65 -84
  14. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0/src/lemonade_sdk.egg-info}/PKG-INFO +30 -19
  15. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade_sdk.egg-info/requires.txt +17 -7
  16. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade_server/cli.py +1 -1
  17. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade_server/model_manager.py +4 -3
  18. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade_server/pydantic_models.py +1 -4
  19. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade_server/server_models.json +35 -11
  20. lemonade_sdk-8.0.6/src/lemonade/version.py +0 -1
  21. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/LICENSE +0 -0
  22. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/NOTICE.md +0 -0
  23. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/setup.cfg +0 -0
  24. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/__init__.py +0 -0
  25. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/api.py +0 -0
  26. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/cache.py +0 -0
  27. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/cli.py +0 -0
  28. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/common/__init__.py +0 -0
  29. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/common/build.py +0 -0
  30. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/common/cli_helpers.py +0 -0
  31. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/common/exceptions.py +0 -0
  32. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/common/filesystem.py +0 -0
  33. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/common/inference_engines.py +0 -0
  34. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/common/printing.py +0 -0
  35. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/common/status.py +0 -0
  36. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/common/system_info.py +0 -0
  37. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/common/test_helpers.py +0 -0
  38. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/profilers/__init__.py +0 -0
  39. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/profilers/memory_tracker.py +0 -0
  40. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/profilers/profiler.py +0 -0
  41. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/sequence.py +0 -0
  42. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/state.py +0 -0
  43. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/tools/__init__.py +0 -0
  44. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/tools/accuracy.py +0 -0
  45. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/tools/adapter.py +0 -0
  46. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/tools/bench.py +0 -0
  47. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/tools/huggingface/bench.py +0 -0
  48. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/tools/huggingface/load.py +0 -0
  49. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/tools/huggingface/utils.py +0 -0
  50. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/tools/humaneval.py +0 -0
  51. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/tools/llamacpp/load.py +0 -0
  52. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/tools/management_tools.py +0 -0
  53. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/tools/mmlu.py +0 -0
  54. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/tools/oga/__init__.py +0 -0
  55. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/tools/oga/bench.py +0 -0
  56. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/tools/perplexity.py +0 -0
  57. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/tools/prompt.py +0 -0
  58. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/tools/quark/__init__.py +0 -0
  59. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/tools/quark/quark_load.py +0 -0
  60. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/tools/quark/quark_quantize.py +0 -0
  61. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/tools/report/__init__.py +0 -0
  62. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/tools/report/llm_report.py +0 -0
  63. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/tools/report/table.py +0 -0
  64. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/tools/server/__init__.py +0 -0
  65. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/tools/server/llamacpp.py +0 -0
  66. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/tools/server/static/favicon.ico +0 -0
  67. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/tools/server/tool_calls.py +0 -0
  68. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/tools/server/tray.py +0 -0
  69. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/tools/server/utils/port.py +0 -0
  70. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/tools/server/utils/system_tray.py +0 -0
  71. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/tools/server/utils/thread.py +0 -0
  72. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/tools/server/webapp.py +0 -0
  73. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade/tools/tool.py +0 -0
  74. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade_install/__init__.py +0 -0
  75. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade_sdk.egg-info/SOURCES.txt +0 -0
  76. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade_sdk.egg-info/dependency_links.txt +0 -0
  77. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade_sdk.egg-info/entry_points.txt +0 -0
  78. {lemonade_sdk-8.0.6 → lemonade_sdk-8.1.0}/src/lemonade_sdk.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lemonade-sdk
3
- Version: 8.0.6
3
+ Version: 8.1.0
4
4
  Summary: Lemonade SDK: Your LLM Aide for Validation and Deployment
5
5
  Author-email: lemonade@amd.com
6
6
  Requires-Python: >=3.10, <3.13
@@ -22,16 +22,15 @@ Requires-Dist: pytz
22
22
  Requires-Dist: zstandard
23
23
  Requires-Dist: fastapi
24
24
  Requires-Dist: uvicorn[standard]
25
- Requires-Dist: openai>=1.81.0
25
+ Requires-Dist: openai<1.97.1,>=1.81.0
26
26
  Requires-Dist: transformers<=4.53.2
27
27
  Requires-Dist: jinja2
28
28
  Requires-Dist: tabulate
29
29
  Requires-Dist: sentencepiece
30
30
  Requires-Dist: huggingface-hub==0.33.0
31
- Provides-Extra: oga-hybrid
32
- Requires-Dist: onnx==1.16.1; extra == "oga-hybrid"
33
- Requires-Dist: numpy==1.26.4; extra == "oga-hybrid"
34
- Requires-Dist: protobuf>=6.30.1; extra == "oga-hybrid"
31
+ Provides-Extra: oga-ryzenai
32
+ Requires-Dist: onnxruntime-genai-directml-ryzenai==0.7.0.2; extra == "oga-ryzenai"
33
+ Requires-Dist: protobuf>=6.30.1; extra == "oga-ryzenai"
35
34
  Provides-Extra: oga-cpu
36
35
  Requires-Dist: onnxruntime-genai==0.8.2; extra == "oga-cpu"
37
36
  Requires-Dist: onnxruntime>=1.22.0; extra == "oga-cpu"
@@ -43,14 +42,32 @@ Requires-Dist: pandas>=1.5.3; extra == "dev"
43
42
  Requires-Dist: matplotlib; extra == "dev"
44
43
  Requires-Dist: human-eval-windows==1.0.4; extra == "dev"
45
44
  Requires-Dist: lm-eval[api]; extra == "dev"
45
+ Provides-Extra: oga-hybrid
46
+ Requires-Dist: lemonade-sdk[oga-ryzenai]; extra == "oga-hybrid"
47
+ Provides-Extra: oga-unified
48
+ Requires-Dist: lemonade-sdk[oga-ryzenai]; extra == "oga-unified"
46
49
  Provides-Extra: oga-hybrid-minimal
47
- Requires-Dist: lemonade-sdk[oga-hybrid]; extra == "oga-hybrid-minimal"
50
+ Requires-Dist: lemonade-sdk[oga-ryzenai]; extra == "oga-hybrid-minimal"
48
51
  Provides-Extra: oga-cpu-minimal
49
52
  Requires-Dist: lemonade-sdk[oga-cpu]; extra == "oga-cpu-minimal"
53
+ Provides-Extra: oga-npu-minimal
54
+ Requires-Dist: lemonade-sdk[oga-ryzenai]; extra == "oga-npu-minimal"
50
55
  Provides-Extra: llm
51
56
  Requires-Dist: lemonade-sdk[dev]; extra == "llm"
52
57
  Provides-Extra: llm-oga-cpu
53
58
  Requires-Dist: lemonade-sdk[dev,oga-cpu]; extra == "llm-oga-cpu"
59
+ Provides-Extra: llm-oga-npu
60
+ Requires-Dist: onnx==1.16.0; extra == "llm-oga-npu"
61
+ Requires-Dist: onnxruntime==1.18.0; extra == "llm-oga-npu"
62
+ Requires-Dist: numpy==1.26.4; extra == "llm-oga-npu"
63
+ Requires-Dist: protobuf>=6.30.1; extra == "llm-oga-npu"
64
+ Requires-Dist: lemonade-sdk[dev]; extra == "llm-oga-npu"
65
+ Provides-Extra: llm-oga-hybrid
66
+ Requires-Dist: onnx==1.16.1; extra == "llm-oga-hybrid"
67
+ Requires-Dist: numpy==1.26.4; extra == "llm-oga-hybrid"
68
+ Requires-Dist: protobuf>=6.30.1; extra == "llm-oga-hybrid"
69
+ Provides-Extra: llm-oga-unified
70
+ Requires-Dist: lemonade-sdk[dev,llm-oga-hybrid]; extra == "llm-oga-unified"
54
71
  Provides-Extra: llm-oga-igpu
55
72
  Requires-Dist: onnxruntime-genai-directml==0.6.0; extra == "llm-oga-igpu"
56
73
  Requires-Dist: onnxruntime-directml<1.22.0,>=1.19.0; extra == "llm-oga-igpu"
@@ -61,16 +78,6 @@ Requires-Dist: onnxruntime-genai-cuda==0.8.2; extra == "llm-oga-cuda"
61
78
  Requires-Dist: onnxruntime-gpu>=1.22.0; extra == "llm-oga-cuda"
62
79
  Requires-Dist: transformers<=4.51.3; extra == "llm-oga-cuda"
63
80
  Requires-Dist: lemonade-sdk[dev]; extra == "llm-oga-cuda"
64
- Provides-Extra: llm-oga-npu
65
- Requires-Dist: onnx==1.16.0; extra == "llm-oga-npu"
66
- Requires-Dist: onnxruntime==1.18.0; extra == "llm-oga-npu"
67
- Requires-Dist: numpy==1.26.4; extra == "llm-oga-npu"
68
- Requires-Dist: protobuf>=6.30.1; extra == "llm-oga-npu"
69
- Requires-Dist: lemonade-sdk[dev]; extra == "llm-oga-npu"
70
- Provides-Extra: llm-oga-hybrid
71
- Requires-Dist: lemonade-sdk[dev,oga-hybrid]; extra == "llm-oga-hybrid"
72
- Provides-Extra: llm-oga-unified
73
- Requires-Dist: lemonade-sdk[llm-oga-hybrid]; extra == "llm-oga-unified"
74
81
  Dynamic: author-email
75
82
  Dynamic: description
76
83
  Dynamic: description-content-type
@@ -174,7 +181,7 @@ lemonade-server list
174
181
 
175
182
  ## Model Library
176
183
 
177
- Lemonade supports both GGUF and ONNX models as detailed in the [Supported Configuration](#supported-configurations) section. A list of all built-in models is available [here](https://lemonade-server.ai/docs/server/models/).
184
+ Lemonade supports both GGUF and ONNX models as detailed in the [Supported Configuration](#supported-configurations) section. A list of all built-in models is available [here](https://lemonade-server.ai/docs/server/server_models/).
178
185
 
179
186
  You can also import custom GGUF and ONNX models from Hugging Face by using our [Model Manager](http://localhost:8000/#model-management) (requires server to be running).
180
187
  <p align="center">
@@ -263,7 +270,7 @@ completion = client.chat.completions.create(
263
270
  print(completion.choices[0].message.content)
264
271
  ```
265
272
 
266
- For more detailed integration instructions, see the [Integration Guide](./server_integration.md).
273
+ For more detailed integration instructions, see the [Integration Guide](./docs/server/server_integration.md).
267
274
 
268
275
  ## Beyond an LLM Server
269
276
 
@@ -272,6 +279,10 @@ The [Lemonade SDK](./docs/README.md) also include the following components:
272
279
  - 🐍 **[Lemonade API](./docs/lemonade_api.md)**: High-level Python API to directly integrate Lemonade LLMs into Python applications.
273
280
  - 🖥️ **[Lemonade CLI](./docs/dev_cli/README.md)**: The `lemonade` CLI lets you mix-and-match LLMs (ONNX, GGUF, SafeTensors) with prompting templates, accuracy testing, performance benchmarking, and memory profiling to characterize your models on your hardware.
274
281
 
282
+ ## FAQ
283
+
284
+ To read our frequently asked questions, see our [FAQ Guide](./docs/faq.md)
285
+
275
286
  ## Contributing
276
287
 
277
288
  We are actively seeking collaborators from across the industry. If you would like to contribute to this project, please check out our [contribution guide](./docs/contribute.md).
@@ -92,7 +92,7 @@ lemonade-server list
92
92
 
93
93
  ## Model Library
94
94
 
95
- Lemonade supports both GGUF and ONNX models as detailed in the [Supported Configuration](#supported-configurations) section. A list of all built-in models is available [here](https://lemonade-server.ai/docs/server/models/).
95
+ Lemonade supports both GGUF and ONNX models as detailed in the [Supported Configuration](#supported-configurations) section. A list of all built-in models is available [here](https://lemonade-server.ai/docs/server/server_models/).
96
96
 
97
97
  You can also import custom GGUF and ONNX models from Hugging Face by using our [Model Manager](http://localhost:8000/#model-management) (requires server to be running).
98
98
  <p align="center">
@@ -181,7 +181,7 @@ completion = client.chat.completions.create(
181
181
  print(completion.choices[0].message.content)
182
182
  ```
183
183
 
184
- For more detailed integration instructions, see the [Integration Guide](./server_integration.md).
184
+ For more detailed integration instructions, see the [Integration Guide](./docs/server/server_integration.md).
185
185
 
186
186
  ## Beyond an LLM Server
187
187
 
@@ -190,6 +190,10 @@ The [Lemonade SDK](./docs/README.md) also include the following components:
190
190
  - 🐍 **[Lemonade API](./docs/lemonade_api.md)**: High-level Python API to directly integrate Lemonade LLMs into Python applications.
191
191
  - 🖥️ **[Lemonade CLI](./docs/dev_cli/README.md)**: The `lemonade` CLI lets you mix-and-match LLMs (ONNX, GGUF, SafeTensors) with prompting templates, accuracy testing, performance benchmarking, and memory profiling to characterize your models on your hardware.
192
192
 
193
+ ## FAQ
194
+
195
+ To read our frequently asked questions, see our [FAQ Guide](./docs/faq.md)
196
+
193
197
  ## Contributing
194
198
 
195
199
  We are actively seeking collaborators from across the industry. If you would like to contribute to this project, please check out our [contribution guide](./docs/contribute.md).
@@ -44,7 +44,7 @@ setup(
44
44
  "zstandard",
45
45
  "fastapi",
46
46
  "uvicorn[standard]",
47
- "openai>=1.81.0",
47
+ "openai>=1.81.0,<1.97.1",
48
48
  "transformers<=4.53.2",
49
49
  "jinja2",
50
50
  "tabulate",
@@ -54,11 +54,9 @@ setup(
54
54
  extras_require={
55
55
  # The non-dev extras are meant to deploy specific backends into end-user
56
56
  # applications, without including developer-focused tools
57
- "oga-hybrid": [
58
- # Note: `lemonade-install --ryzenai hybrid` is necessary
59
- # to complete installation
60
- "onnx==1.16.1",
61
- "numpy==1.26.4",
57
+ # Primary NPU extra using unified PyPI package
58
+ "oga-ryzenai": [
59
+ "onnxruntime-genai-directml-ryzenai==0.7.0.2",
62
60
  "protobuf>=6.30.1",
63
61
  ],
64
62
  "oga-cpu": [
@@ -81,11 +79,31 @@ setup(
81
79
  "lm-eval[api]",
82
80
  ],
83
81
  # Keep backwards compatibility for old extras names
84
- "oga-hybrid-minimal": ["lemonade-sdk[oga-hybrid]"],
82
+ "oga-hybrid": ["lemonade-sdk[oga-ryzenai]"],
83
+ "oga-unified": ["lemonade-sdk[oga-ryzenai]"],
84
+ "oga-hybrid-minimal": ["lemonade-sdk[oga-ryzenai]"],
85
85
  "oga-cpu-minimal": ["lemonade-sdk[oga-cpu]"],
86
+ "oga-npu-minimal": ["lemonade-sdk[oga-ryzenai]"],
86
87
  "llm": ["lemonade-sdk[dev]"],
87
88
  "llm-oga-cpu": ["lemonade-sdk[dev,oga-cpu]"],
88
89
  # The following extras are deprecated and/or not commonly used
90
+ "llm-oga-npu": [
91
+ "onnx==1.16.0",
92
+ # NPU requires specific onnxruntime version for Ryzen AI compatibility
93
+ # This may conflict with other OGA extras that require >=1.22.0
94
+ "onnxruntime==1.18.0",
95
+ "numpy==1.26.4",
96
+ "protobuf>=6.30.1",
97
+ "lemonade-sdk[dev]",
98
+ ],
99
+ "llm-oga-hybrid": [
100
+ # Note: `lemonade-install --ryzenai hybrid` is necessary
101
+ # to complete installation for RAI 1.4.0.
102
+ "onnx==1.16.1",
103
+ "numpy==1.26.4",
104
+ "protobuf>=6.30.1",
105
+ ],
106
+ "llm-oga-unified": ["lemonade-sdk[dev, llm-oga-hybrid]"],
89
107
  "llm-oga-igpu": [
90
108
  "onnxruntime-genai-directml==0.6.0",
91
109
  "onnxruntime-directml>=1.19.0,<1.22.0",
@@ -98,17 +116,6 @@ setup(
98
116
  "transformers<=4.51.3",
99
117
  "lemonade-sdk[dev]",
100
118
  ],
101
- "llm-oga-npu": [
102
- "onnx==1.16.0",
103
- # NPU requires specific onnxruntime version for Ryzen AI compatibility
104
- # This may conflict with other OGA extras that require >=1.22.0
105
- "onnxruntime==1.18.0",
106
- "numpy==1.26.4",
107
- "protobuf>=6.30.1",
108
- "lemonade-sdk[dev]",
109
- ],
110
- "llm-oga-hybrid": ["lemonade-sdk[dev,oga-hybrid]"],
111
- "llm-oga-unified": ["lemonade-sdk[llm-oga-hybrid]"],
112
119
  },
113
120
  classifiers=[],
114
121
  entry_points={
@@ -1,7 +1,7 @@
1
1
  import os
2
2
  from typing import Optional
3
3
  import socket
4
- from huggingface_hub import model_info
4
+ from huggingface_hub import model_info, snapshot_download
5
5
 
6
6
 
7
7
  def is_offline():
@@ -48,3 +48,20 @@ def get_base_model(checkpoint: str) -> Optional[str]:
48
48
  except Exception: # pylint: disable=broad-except
49
49
  pass
50
50
  return None
51
+
52
+
53
+ def custom_snapshot_download(repo_id, **kwargs):
54
+ """
55
+ Custom snapshot download with retry logic for Windows symlink privilege errors.
56
+ """
57
+ for attempt in range(2):
58
+ try:
59
+ return snapshot_download(repo_id=repo_id, **kwargs)
60
+ except OSError as e:
61
+ if (
62
+ hasattr(e, "winerror")
63
+ and e.winerror == 1314 # pylint: disable=no-member
64
+ and attempt < 1
65
+ ):
66
+ continue
67
+ raise
@@ -68,7 +68,9 @@ class LlamaCppBench(Bench):
68
68
  # and error handling
69
69
  model.time_to_first_token = None
70
70
  model.tokens_per_second = None
71
- raw_output, stderr = model.generate(prompt, return_raw=True)
71
+ raw_output, stderr = model.generate(
72
+ prompt, max_new_tokens=output_tokens, return_raw=True
73
+ )
72
74
 
73
75
  if model.time_to_first_token is None or model.tokens_per_second is None:
74
76
  error_msg = (
@@ -215,10 +215,10 @@ def get_local_checkpoint_path(base_checkpoint, variant):
215
215
  full_model_path = None
216
216
  model_to_use = None
217
217
  try:
218
- from huggingface_hub import snapshot_download
218
+ from lemonade.common.network import custom_snapshot_download
219
219
 
220
- snapshot_path = snapshot_download(
221
- repo_id=base_checkpoint,
220
+ snapshot_path = custom_snapshot_download(
221
+ base_checkpoint,
222
222
  local_files_only=True,
223
223
  )
224
224
 
@@ -405,10 +405,10 @@ def download_gguf(config_checkpoint, config_mmproj=None) -> dict:
405
405
  core_files, sharded_files = identify_gguf_models(checkpoint, variant, config_mmproj)
406
406
 
407
407
  # Download the files
408
- from huggingface_hub import snapshot_download
408
+ from lemonade.common.network import custom_snapshot_download
409
409
 
410
- snapshot_folder = snapshot_download(
411
- repo_id=checkpoint,
410
+ snapshot_folder = custom_snapshot_download(
411
+ checkpoint,
412
412
  allow_patterns=list(core_files.values()) + sharded_files,
413
413
  )
414
414
 
@@ -573,7 +573,7 @@ class LlamaCppAdapter(ModelAdapter):
573
573
  #
574
574
  if "llama_perf_context_print: eval time =" in line:
575
575
  parts = line.split("=")[1].split()
576
- self.response_tokens = int(parts[3])
576
+ self.response_tokens = int(parts[3]) + 1 # include first token
577
577
  response_time_ms = float(parts[0])
578
578
  self.tokens_per_second = (
579
579
  1000 * self.response_tokens / response_time_ms