lemonade-sdk 8.1.10__tar.gz → 8.1.12__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lemonade-sdk might be problematic. Click here for more details.
- {lemonade_sdk-8.1.10/src/lemonade_sdk.egg-info → lemonade_sdk-8.1.12}/PKG-INFO +10 -6
- {lemonade_sdk-8.1.10 → lemonade_sdk-8.1.12}/README.md +8 -5
- {lemonade_sdk-8.1.10 → lemonade_sdk-8.1.12}/setup.py +3 -0
- {lemonade_sdk-8.1.10 → lemonade_sdk-8.1.12}/src/lemonade/cache.py +6 -1
- {lemonade_sdk-8.1.10 → lemonade_sdk-8.1.12}/src/lemonade/common/status.py +4 -4
- {lemonade_sdk-8.1.10 → lemonade_sdk-8.1.12}/src/lemonade/tools/bench.py +22 -1
- lemonade_sdk-8.1.12/src/lemonade/tools/flm/__init__.py +1 -0
- lemonade_sdk-8.1.12/src/lemonade/tools/flm/utils.py +255 -0
- lemonade_sdk-8.1.12/src/lemonade/tools/llamacpp/bench.py +224 -0
- {lemonade_sdk-8.1.10 → lemonade_sdk-8.1.12}/src/lemonade/tools/llamacpp/load.py +20 -1
- {lemonade_sdk-8.1.10 → lemonade_sdk-8.1.12}/src/lemonade/tools/llamacpp/utils.py +210 -17
- {lemonade_sdk-8.1.10 → lemonade_sdk-8.1.12}/src/lemonade/tools/oga/bench.py +0 -26
- {lemonade_sdk-8.1.10 → lemonade_sdk-8.1.12}/src/lemonade/tools/report/table.py +6 -0
- lemonade_sdk-8.1.12/src/lemonade/tools/server/flm.py +133 -0
- {lemonade_sdk-8.1.10 → lemonade_sdk-8.1.12}/src/lemonade/tools/server/llamacpp.py +23 -5
- {lemonade_sdk-8.1.10 → lemonade_sdk-8.1.12}/src/lemonade/tools/server/serve.py +260 -135
- {lemonade_sdk-8.1.10 → lemonade_sdk-8.1.12}/src/lemonade/tools/server/static/js/chat.js +165 -82
- {lemonade_sdk-8.1.10 → lemonade_sdk-8.1.12}/src/lemonade/tools/server/static/js/models.js +87 -54
- {lemonade_sdk-8.1.10 → lemonade_sdk-8.1.12}/src/lemonade/tools/server/static/js/shared.js +9 -6
- lemonade_sdk-8.1.12/src/lemonade/tools/server/static/logs.html +57 -0
- {lemonade_sdk-8.1.10 → lemonade_sdk-8.1.12}/src/lemonade/tools/server/static/styles.css +159 -8
- {lemonade_sdk-8.1.10 → lemonade_sdk-8.1.12}/src/lemonade/tools/server/static/webapp.html +28 -10
- {lemonade_sdk-8.1.10 → lemonade_sdk-8.1.12}/src/lemonade/tools/server/tray.py +94 -38
- lemonade_sdk-8.1.12/src/lemonade/tools/server/utils/macos_tray.py +226 -0
- lemonade_sdk-8.1.10/src/lemonade/tools/server/utils/system_tray.py → lemonade_sdk-8.1.12/src/lemonade/tools/server/utils/windows_tray.py +13 -0
- {lemonade_sdk-8.1.10 → lemonade_sdk-8.1.12}/src/lemonade/tools/server/webapp.py +4 -1
- {lemonade_sdk-8.1.10 → lemonade_sdk-8.1.12}/src/lemonade/tools/server/wrapped_server.py +91 -25
- lemonade_sdk-8.1.12/src/lemonade/version.py +1 -0
- {lemonade_sdk-8.1.10 → lemonade_sdk-8.1.12}/src/lemonade_install/install.py +25 -2
- {lemonade_sdk-8.1.10 → lemonade_sdk-8.1.12/src/lemonade_sdk.egg-info}/PKG-INFO +10 -6
- {lemonade_sdk-8.1.10 → lemonade_sdk-8.1.12}/src/lemonade_sdk.egg-info/SOURCES.txt +6 -1
- {lemonade_sdk-8.1.10 → lemonade_sdk-8.1.12}/src/lemonade_sdk.egg-info/requires.txt +3 -0
- {lemonade_sdk-8.1.10 → lemonade_sdk-8.1.12}/src/lemonade_server/cli.py +103 -14
- {lemonade_sdk-8.1.10 → lemonade_sdk-8.1.12}/src/lemonade_server/model_manager.py +186 -45
- {lemonade_sdk-8.1.10 → lemonade_sdk-8.1.12}/src/lemonade_server/pydantic_models.py +25 -1
- {lemonade_sdk-8.1.10 → lemonade_sdk-8.1.12}/src/lemonade_server/server_models.json +175 -62
- lemonade_sdk-8.1.10/src/lemonade/tools/llamacpp/bench.py +0 -136
- lemonade_sdk-8.1.10/src/lemonade/version.py +0 -1
- {lemonade_sdk-8.1.10 → lemonade_sdk-8.1.12}/LICENSE +0 -0
- {lemonade_sdk-8.1.10 → lemonade_sdk-8.1.12}/NOTICE.md +0 -0
- {lemonade_sdk-8.1.10 → lemonade_sdk-8.1.12}/pyproject.toml +0 -0
- {lemonade_sdk-8.1.10 → lemonade_sdk-8.1.12}/setup.cfg +0 -0
- {lemonade_sdk-8.1.10 → lemonade_sdk-8.1.12}/src/lemonade/__init__.py +0 -0
- {lemonade_sdk-8.1.10 → lemonade_sdk-8.1.12}/src/lemonade/api.py +0 -0
- {lemonade_sdk-8.1.10 → lemonade_sdk-8.1.12}/src/lemonade/cli.py +0 -0
- {lemonade_sdk-8.1.10 → lemonade_sdk-8.1.12}/src/lemonade/common/__init__.py +0 -0
- {lemonade_sdk-8.1.10 → lemonade_sdk-8.1.12}/src/lemonade/common/build.py +0 -0
- {lemonade_sdk-8.1.10 → lemonade_sdk-8.1.12}/src/lemonade/common/cli_helpers.py +0 -0
- {lemonade_sdk-8.1.10 → lemonade_sdk-8.1.12}/src/lemonade/common/exceptions.py +0 -0
- {lemonade_sdk-8.1.10 → lemonade_sdk-8.1.12}/src/lemonade/common/filesystem.py +0 -0
- {lemonade_sdk-8.1.10 → lemonade_sdk-8.1.12}/src/lemonade/common/inference_engines.py +0 -0
- {lemonade_sdk-8.1.10 → lemonade_sdk-8.1.12}/src/lemonade/common/network.py +0 -0
- {lemonade_sdk-8.1.10 → lemonade_sdk-8.1.12}/src/lemonade/common/printing.py +0 -0
- {lemonade_sdk-8.1.10 → lemonade_sdk-8.1.12}/src/lemonade/common/system_info.py +0 -0
- {lemonade_sdk-8.1.10 → lemonade_sdk-8.1.12}/src/lemonade/common/test_helpers.py +0 -0
- {lemonade_sdk-8.1.10 → lemonade_sdk-8.1.12}/src/lemonade/profilers/__init__.py +0 -0
- {lemonade_sdk-8.1.10 → lemonade_sdk-8.1.12}/src/lemonade/profilers/agt_power.py +0 -0
- {lemonade_sdk-8.1.10 → lemonade_sdk-8.1.12}/src/lemonade/profilers/hwinfo_power.py +0 -0
- {lemonade_sdk-8.1.10 → lemonade_sdk-8.1.12}/src/lemonade/profilers/memory_tracker.py +0 -0
- {lemonade_sdk-8.1.10 → lemonade_sdk-8.1.12}/src/lemonade/profilers/profiler.py +0 -0
- {lemonade_sdk-8.1.10 → lemonade_sdk-8.1.12}/src/lemonade/sequence.py +0 -0
- {lemonade_sdk-8.1.10 → lemonade_sdk-8.1.12}/src/lemonade/state.py +0 -0
- {lemonade_sdk-8.1.10 → lemonade_sdk-8.1.12}/src/lemonade/tools/__init__.py +0 -0
- {lemonade_sdk-8.1.10 → lemonade_sdk-8.1.12}/src/lemonade/tools/accuracy.py +0 -0
- {lemonade_sdk-8.1.10 → lemonade_sdk-8.1.12}/src/lemonade/tools/adapter.py +0 -0
- {lemonade_sdk-8.1.10 → lemonade_sdk-8.1.12}/src/lemonade/tools/huggingface/bench.py +0 -0
- {lemonade_sdk-8.1.10 → lemonade_sdk-8.1.12}/src/lemonade/tools/huggingface/load.py +0 -0
- {lemonade_sdk-8.1.10 → lemonade_sdk-8.1.12}/src/lemonade/tools/huggingface/utils.py +0 -0
- {lemonade_sdk-8.1.10 → lemonade_sdk-8.1.12}/src/lemonade/tools/humaneval.py +0 -0
- {lemonade_sdk-8.1.10 → lemonade_sdk-8.1.12}/src/lemonade/tools/management_tools.py +0 -0
- {lemonade_sdk-8.1.10 → lemonade_sdk-8.1.12}/src/lemonade/tools/mmlu.py +0 -0
- {lemonade_sdk-8.1.10 → lemonade_sdk-8.1.12}/src/lemonade/tools/oga/__init__.py +0 -0
- {lemonade_sdk-8.1.10 → lemonade_sdk-8.1.12}/src/lemonade/tools/oga/load.py +0 -0
- {lemonade_sdk-8.1.10 → lemonade_sdk-8.1.12}/src/lemonade/tools/oga/utils.py +0 -0
- {lemonade_sdk-8.1.10 → lemonade_sdk-8.1.12}/src/lemonade/tools/perplexity.py +0 -0
- {lemonade_sdk-8.1.10 → lemonade_sdk-8.1.12}/src/lemonade/tools/prompt.py +0 -0
- {lemonade_sdk-8.1.10 → lemonade_sdk-8.1.12}/src/lemonade/tools/report/__init__.py +0 -0
- {lemonade_sdk-8.1.10 → lemonade_sdk-8.1.12}/src/lemonade/tools/report/llm_report.py +0 -0
- {lemonade_sdk-8.1.10 → lemonade_sdk-8.1.12}/src/lemonade/tools/server/__init__.py +0 -0
- {lemonade_sdk-8.1.10 → lemonade_sdk-8.1.12}/src/lemonade/tools/server/static/favicon.ico +0 -0
- {lemonade_sdk-8.1.10 → lemonade_sdk-8.1.12}/src/lemonade/tools/server/static/js/model-settings.js +0 -0
- {lemonade_sdk-8.1.10 → lemonade_sdk-8.1.12}/src/lemonade/tools/server/tool_calls.py +0 -0
- {lemonade_sdk-8.1.10 → lemonade_sdk-8.1.12}/src/lemonade/tools/server/utils/port.py +0 -0
- {lemonade_sdk-8.1.10 → lemonade_sdk-8.1.12}/src/lemonade/tools/server/utils/thread.py +0 -0
- {lemonade_sdk-8.1.10 → lemonade_sdk-8.1.12}/src/lemonade/tools/tool.py +0 -0
- {lemonade_sdk-8.1.10 → lemonade_sdk-8.1.12}/src/lemonade_install/__init__.py +0 -0
- {lemonade_sdk-8.1.10 → lemonade_sdk-8.1.12}/src/lemonade_sdk.egg-info/dependency_links.txt +0 -0
- {lemonade_sdk-8.1.10 → lemonade_sdk-8.1.12}/src/lemonade_sdk.egg-info/entry_points.txt +0 -0
- {lemonade_sdk-8.1.10 → lemonade_sdk-8.1.12}/src/lemonade_sdk.egg-info/top_level.txt +0 -0
- {lemonade_sdk-8.1.10 → lemonade_sdk-8.1.12}/src/lemonade_server/settings.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: lemonade-sdk
|
|
3
|
-
Version: 8.1.
|
|
3
|
+
Version: 8.1.12
|
|
4
4
|
Summary: Lemonade SDK: Your LLM Aide for Validation and Deployment
|
|
5
5
|
Author-email: lemonade@amd.com
|
|
6
6
|
Requires-Python: >=3.10, <3.14
|
|
@@ -29,6 +29,7 @@ Requires-Dist: tabulate
|
|
|
29
29
|
Requires-Dist: sentencepiece
|
|
30
30
|
Requires-Dist: huggingface-hub[hf_xet]==0.33.0
|
|
31
31
|
Requires-Dist: python-dotenv
|
|
32
|
+
Requires-Dist: rumps>=0.4.0; sys_platform == "darwin"
|
|
32
33
|
Provides-Extra: oga-ryzenai
|
|
33
34
|
Requires-Dist: onnxruntime-genai-directml-ryzenai==0.7.0.2.1; extra == "oga-ryzenai"
|
|
34
35
|
Requires-Dist: protobuf>=6.30.1; extra == "oga-ryzenai"
|
|
@@ -65,6 +66,8 @@ Dynamic: summary
|
|
|
65
66
|
<img src="https://img.shields.io/badge/Windows-11-0078D6?logo=windows&logoColor=white" alt="Windows 11" /></a>
|
|
66
67
|
<a href="https://lemonade-server.ai/#linux" title="Ubuntu 24.04 & 25.04 Supported">
|
|
67
68
|
<img src="https://img.shields.io/badge/Ubuntu-24.04%20%7C%2025.04-E95420?logo=ubuntu&logoColor=white" alt="Ubuntu 24.04 | 25.04" /></a>
|
|
69
|
+
<a href="https://lemonade-server.ai/" title="macOS 14+ with Apple Silicon">
|
|
70
|
+
<img src="https://img.shields.io/badge/macOS-14%2B-000000?logo=apple&logoColor=white" alt="macOS 14+" /></a>
|
|
68
71
|
<a href="docs/README.md#installation" title="Check out our instructions">
|
|
69
72
|
<img src="https://img.shields.io/badge/Python-3.10--3.13-blue?logo=python&logoColor=white" alt="Made with Python" /></a>
|
|
70
73
|
<a href="https://github.com/lemonade-sdk/lemonade/blob/main/docs/contribute.md" title="Contribution Guide">
|
|
@@ -152,11 +155,11 @@ You can also import custom GGUF and ONNX models from Hugging Face by using our [
|
|
|
152
155
|
|
|
153
156
|
Lemonade supports the following configurations, while also making it easy to switch between them at runtime. Find more information about it [here](./docs/README.md#software-and-hardware-overview).
|
|
154
157
|
|
|
155
|
-
| Hardware | Engine: OGA | Engine: llamacpp | Engine:
|
|
156
|
-
|
|
157
|
-
| **🧠 CPU** | All platforms | All platforms |
|
|
158
|
-
| **🎮 GPU** | — | Vulkan: All platforms<br>ROCm: Selected AMD platforms
|
|
159
|
-
| **🤖 NPU** | AMD Ryzen™ AI 300 series | — |
|
|
158
|
+
| Hardware | Engine: OGA | Engine: llamacpp | Engine: FLM | Windows | Linux | macOS |
|
|
159
|
+
|----------|-------------|------------------|------------|---------|-------|-------|
|
|
160
|
+
| **🧠 CPU** | All platforms | All platforms | - | ✅ | ✅ | ✅ |
|
|
161
|
+
| **🎮 GPU** | — | Vulkan: All platforms<br>ROCm: Selected AMD platforms*<br>Metal: Apple Silicon | — | ✅ | ✅ | ✅ |
|
|
162
|
+
| **🤖 NPU** | AMD Ryzen™ AI 300 series | — | Ryzen™ AI 300 series | ✅ | — | — |
|
|
160
163
|
|
|
161
164
|
<details>
|
|
162
165
|
<summary><small><i>* See supported AMD ROCm platforms</i></small></summary>
|
|
@@ -261,6 +264,7 @@ This project is:
|
|
|
261
264
|
- [OnnxRuntime GenAI](https://github.com/microsoft/onnxruntime-genai)
|
|
262
265
|
- [Hugging Face Hub](https://github.com/huggingface/huggingface_hub)
|
|
263
266
|
- [OpenAI API](https://github.com/openai/openai-python)
|
|
267
|
+
- [IRON/MLIR-AIE](https://github.com/Xilinx/mlir-aie)
|
|
264
268
|
- and more...
|
|
265
269
|
- Accelerated by mentorship from the OCV Catalyst program.
|
|
266
270
|
- Licensed under the [Apache 2.0 License](https://github.com/lemonade-sdk/lemonade/blob/main/LICENSE).
|
|
@@ -9,6 +9,8 @@
|
|
|
9
9
|
<img src="https://img.shields.io/badge/Windows-11-0078D6?logo=windows&logoColor=white" alt="Windows 11" /></a>
|
|
10
10
|
<a href="https://lemonade-server.ai/#linux" title="Ubuntu 24.04 & 25.04 Supported">
|
|
11
11
|
<img src="https://img.shields.io/badge/Ubuntu-24.04%20%7C%2025.04-E95420?logo=ubuntu&logoColor=white" alt="Ubuntu 24.04 | 25.04" /></a>
|
|
12
|
+
<a href="https://lemonade-server.ai/" title="macOS 14+ with Apple Silicon">
|
|
13
|
+
<img src="https://img.shields.io/badge/macOS-14%2B-000000?logo=apple&logoColor=white" alt="macOS 14+" /></a>
|
|
12
14
|
<a href="docs/README.md#installation" title="Check out our instructions">
|
|
13
15
|
<img src="https://img.shields.io/badge/Python-3.10--3.13-blue?logo=python&logoColor=white" alt="Made with Python" /></a>
|
|
14
16
|
<a href="https://github.com/lemonade-sdk/lemonade/blob/main/docs/contribute.md" title="Contribution Guide">
|
|
@@ -96,11 +98,11 @@ You can also import custom GGUF and ONNX models from Hugging Face by using our [
|
|
|
96
98
|
|
|
97
99
|
Lemonade supports the following configurations, while also making it easy to switch between them at runtime. Find more information about it [here](./docs/README.md#software-and-hardware-overview).
|
|
98
100
|
|
|
99
|
-
| Hardware | Engine: OGA | Engine: llamacpp | Engine:
|
|
100
|
-
|
|
101
|
-
| **🧠 CPU** | All platforms | All platforms |
|
|
102
|
-
| **🎮 GPU** | — | Vulkan: All platforms<br>ROCm: Selected AMD platforms
|
|
103
|
-
| **🤖 NPU** | AMD Ryzen™ AI 300 series | — |
|
|
101
|
+
| Hardware | Engine: OGA | Engine: llamacpp | Engine: FLM | Windows | Linux | macOS |
|
|
102
|
+
|----------|-------------|------------------|------------|---------|-------|-------|
|
|
103
|
+
| **🧠 CPU** | All platforms | All platforms | - | ✅ | ✅ | ✅ |
|
|
104
|
+
| **🎮 GPU** | — | Vulkan: All platforms<br>ROCm: Selected AMD platforms*<br>Metal: Apple Silicon | — | ✅ | ✅ | ✅ |
|
|
105
|
+
| **🤖 NPU** | AMD Ryzen™ AI 300 series | — | Ryzen™ AI 300 series | ✅ | — | — |
|
|
104
106
|
|
|
105
107
|
<details>
|
|
106
108
|
<summary><small><i>* See supported AMD ROCm platforms</i></small></summary>
|
|
@@ -205,6 +207,7 @@ This project is:
|
|
|
205
207
|
- [OnnxRuntime GenAI](https://github.com/microsoft/onnxruntime-genai)
|
|
206
208
|
- [Hugging Face Hub](https://github.com/huggingface/huggingface_hub)
|
|
207
209
|
- [OpenAI API](https://github.com/openai/openai-python)
|
|
210
|
+
- [IRON/MLIR-AIE](https://github.com/Xilinx/mlir-aie)
|
|
208
211
|
- and more...
|
|
209
212
|
- Accelerated by mentorship from the OCV Catalyst program.
|
|
210
213
|
- Licensed under the [Apache 2.0 License](https://github.com/lemonade-sdk/lemonade/blob/main/LICENSE).
|
|
@@ -17,6 +17,7 @@ setup(
|
|
|
17
17
|
"lemonade.tools.huggingface",
|
|
18
18
|
"lemonade.tools.oga",
|
|
19
19
|
"lemonade.tools.llamacpp",
|
|
20
|
+
"lemonade.tools.flm",
|
|
20
21
|
"lemonade.tools.report",
|
|
21
22
|
"lemonade.tools.server.utils",
|
|
22
23
|
"lemonade.tools.server",
|
|
@@ -48,6 +49,8 @@ setup(
|
|
|
48
49
|
"sentencepiece",
|
|
49
50
|
"huggingface-hub[hf_xet]==0.33.0",
|
|
50
51
|
"python-dotenv",
|
|
52
|
+
# macOS-specific dependencies
|
|
53
|
+
"rumps>=0.4.0; sys_platform == 'darwin'",
|
|
51
54
|
],
|
|
52
55
|
extras_require={
|
|
53
56
|
# The non-dev extras are meant to deploy specific backends into end-user
|
|
@@ -43,7 +43,11 @@ def build_name(input_name):
|
|
|
43
43
|
"""
|
|
44
44
|
|
|
45
45
|
if os.path.isdir(input_name):
|
|
46
|
+
# Input is a folder so no good way to determine a model name
|
|
46
47
|
input_name_sanitized = "local_model"
|
|
48
|
+
elif os.path.isfile(input_name):
|
|
49
|
+
# Use the filename without its extension
|
|
50
|
+
input_name_sanitized = os.path.splitext(os.path.basename(input_name))[0]
|
|
47
51
|
else:
|
|
48
52
|
# Sanitize the input name
|
|
49
53
|
input_name_sanitized = input_name.replace("/", "_")
|
|
@@ -63,8 +67,9 @@ class Keys:
|
|
|
63
67
|
TOKEN_GENERATION_TOKENS_PER_SECOND = "token_generation_tokens_per_second"
|
|
64
68
|
STD_DEV_TOKENS_PER_SECOND = "std_dev_tokens_per_second"
|
|
65
69
|
SECONDS_TO_FIRST_TOKEN = "seconds_to_first_token"
|
|
66
|
-
PREFILL_TOKENS_PER_SECOND = "prefill_tokens_per_second"
|
|
67
70
|
STD_DEV_SECONDS_TO_FIRST_TOKEN = "std_dev_seconds_to_first_token"
|
|
71
|
+
PREFILL_TOKENS_PER_SECOND = "prefill_tokens_per_second"
|
|
72
|
+
STD_DEV_PREFILL_TOKENS_PER_SECOND = "std_dev_prefill_tokens_per_second"
|
|
68
73
|
CHECKPOINT = "checkpoint"
|
|
69
74
|
DTYPE = "dtype"
|
|
70
75
|
PROMPT = "prompt"
|
|
@@ -112,10 +112,10 @@ class UniqueInvocationInfo(BasicInfo):
|
|
|
112
112
|
if print_file_name:
|
|
113
113
|
print(f"{self.script_name}{self.extension}:")
|
|
114
114
|
|
|
115
|
-
# Print invocation about the model (only applies to scripts, not ONNX files
|
|
115
|
+
# Print invocation about the model (only applies to scripts, not ONNX or GGUF files, nor
|
|
116
116
|
# LLMs, which have no extension)
|
|
117
117
|
if not (
|
|
118
|
-
self.extension
|
|
118
|
+
self.extension in [".onnx", ".gguf"]
|
|
119
119
|
or self.extension == build.state_file_name
|
|
120
120
|
or self.extension == ""
|
|
121
121
|
):
|
|
@@ -138,7 +138,7 @@ class UniqueInvocationInfo(BasicInfo):
|
|
|
138
138
|
|
|
139
139
|
if self.depth == 0:
|
|
140
140
|
print(f"{self.indent}\tLocation:\t{self.file}", end="")
|
|
141
|
-
if self.extension
|
|
141
|
+
if self.extension in [".onnx", ".gguf"]:
|
|
142
142
|
print()
|
|
143
143
|
else:
|
|
144
144
|
print(f", line {self.line}")
|
|
@@ -314,7 +314,7 @@ class UniqueInvocationInfo(BasicInfo):
|
|
|
314
314
|
Print information about a given model or submodel.
|
|
315
315
|
"""
|
|
316
316
|
|
|
317
|
-
if self.extension
|
|
317
|
+
if self.extension in [".onnx", ".gguf"] or self.extension == "":
|
|
318
318
|
self.indent = "\t" * (2 * self.depth)
|
|
319
319
|
else:
|
|
320
320
|
self.indent = "\t" * (2 * self.depth + 1)
|
|
@@ -29,7 +29,9 @@ class Bench(Tool, ABC):
|
|
|
29
29
|
Keys.SECONDS_TO_FIRST_TOKEN,
|
|
30
30
|
Keys.STD_DEV_SECONDS_TO_FIRST_TOKEN,
|
|
31
31
|
Keys.TOKEN_GENERATION_TOKENS_PER_SECOND,
|
|
32
|
+
Keys.STD_DEV_TOKENS_PER_SECOND,
|
|
32
33
|
Keys.PREFILL_TOKENS_PER_SECOND,
|
|
34
|
+
Keys.STD_DEV_PREFILL_TOKENS_PER_SECOND,
|
|
33
35
|
Keys.PROMPT_TOKENS,
|
|
34
36
|
Keys.RESPONSE_TOKENS,
|
|
35
37
|
Keys.MAX_MEMORY_USED_GBYTE,
|
|
@@ -42,7 +44,9 @@ class Bench(Tool, ABC):
|
|
|
42
44
|
self.mean_time_to_first_token_list = []
|
|
43
45
|
self.std_dev_time_to_first_token_list = []
|
|
44
46
|
self.prefill_tokens_per_second_list = []
|
|
47
|
+
self.std_dev_prefill_tokens_per_second_list = []
|
|
45
48
|
self.token_generation_tokens_per_second_list = []
|
|
49
|
+
self.std_dev_token_generation_tokens_per_second_list = []
|
|
46
50
|
self.max_memory_used_gb_list = []
|
|
47
51
|
|
|
48
52
|
# Max memory used can only be measured on Windows systems
|
|
@@ -88,7 +92,7 @@ class Bench(Tool, ABC):
|
|
|
88
92
|
default=[str(default_prompt_length)],
|
|
89
93
|
metavar="PROMPT",
|
|
90
94
|
help="Input one or more prompts to the LLM. Three formats are supported. "
|
|
91
|
-
"1) integer: use a synthetic prompt with the specified length "
|
|
95
|
+
"1) integer: use a synthetic prompt with the specified token length "
|
|
92
96
|
"2) str: use a user-provided prompt string "
|
|
93
97
|
"3) path/to/prompt.txt: load the prompt from a text file. "
|
|
94
98
|
f"(default: {default_prompt_length}) ",
|
|
@@ -246,10 +250,27 @@ class Bench(Tool, ABC):
|
|
|
246
250
|
Keys.PREFILL_TOKENS_PER_SECOND,
|
|
247
251
|
self.get_item_or_list(self.prefill_tokens_per_second_list),
|
|
248
252
|
)
|
|
253
|
+
if not all(
|
|
254
|
+
element is None for element in self.std_dev_prefill_tokens_per_second_list
|
|
255
|
+
):
|
|
256
|
+
state.save_stat(
|
|
257
|
+
Keys.STD_DEV_PREFILL_TOKENS_PER_SECOND,
|
|
258
|
+
self.get_item_or_list(self.std_dev_prefill_tokens_per_second_list),
|
|
259
|
+
)
|
|
249
260
|
state.save_stat(
|
|
250
261
|
Keys.TOKEN_GENERATION_TOKENS_PER_SECOND,
|
|
251
262
|
self.get_item_or_list(self.token_generation_tokens_per_second_list),
|
|
252
263
|
)
|
|
264
|
+
if not all(
|
|
265
|
+
element is None
|
|
266
|
+
for element in self.std_dev_token_generation_tokens_per_second_list
|
|
267
|
+
):
|
|
268
|
+
state.save_stat(
|
|
269
|
+
Keys.STD_DEV_TOKENS_PER_SECOND,
|
|
270
|
+
self.get_item_or_list(
|
|
271
|
+
self.std_dev_token_generation_tokens_per_second_list
|
|
272
|
+
),
|
|
273
|
+
)
|
|
253
274
|
if self.save_max_memory_used:
|
|
254
275
|
state.save_stat(
|
|
255
276
|
Keys.MAX_MEMORY_USED_GBYTE,
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# FLM (FastFlowLM) utilities for Lemonade SDK
|
|
@@ -0,0 +1,255 @@
|
|
|
1
|
+
"""
|
|
2
|
+
FLM (FastFlowLM) utilities for installation, version checking, and model management.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
import logging
|
|
7
|
+
import subprocess
|
|
8
|
+
import tempfile
|
|
9
|
+
import time
|
|
10
|
+
from typing import List, Optional
|
|
11
|
+
|
|
12
|
+
import requests
|
|
13
|
+
from packaging.version import Version
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
FLM_MINIMUM_VERSION = "0.9.12"
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def check_flm_version() -> Optional[str]:
|
|
20
|
+
"""
|
|
21
|
+
Check if FLM is installed and return version, or None if not available.
|
|
22
|
+
"""
|
|
23
|
+
try:
|
|
24
|
+
result = subprocess.run(
|
|
25
|
+
["flm", "version"],
|
|
26
|
+
capture_output=True,
|
|
27
|
+
text=True,
|
|
28
|
+
check=True,
|
|
29
|
+
encoding="utf-8",
|
|
30
|
+
errors="replace",
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
# Parse version from output like "FLM v0.9.4"
|
|
34
|
+
output = result.stdout.strip()
|
|
35
|
+
if output.startswith("FLM v"):
|
|
36
|
+
version_str = output[5:] # Remove "FLM v" prefix
|
|
37
|
+
return version_str
|
|
38
|
+
return None
|
|
39
|
+
|
|
40
|
+
except (subprocess.CalledProcessError, FileNotFoundError):
|
|
41
|
+
return None
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def refresh_environment():
|
|
45
|
+
"""
|
|
46
|
+
Refresh PATH to pick up newly installed executables.
|
|
47
|
+
"""
|
|
48
|
+
if os.name == "nt": # Windows
|
|
49
|
+
# On Windows, we need to refresh the PATH from registry
|
|
50
|
+
import winreg
|
|
51
|
+
|
|
52
|
+
try:
|
|
53
|
+
with winreg.OpenKey(
|
|
54
|
+
winreg.HKEY_LOCAL_MACHINE,
|
|
55
|
+
r"SYSTEM\CurrentControlSet\Control\Session Manager\Environment",
|
|
56
|
+
) as key:
|
|
57
|
+
path_value, _ = winreg.QueryValueEx(key, "PATH")
|
|
58
|
+
os.environ["PATH"] = path_value + ";" + os.environ.get("PATH", "")
|
|
59
|
+
except Exception as e: # pylint: disable=broad-except
|
|
60
|
+
logging.debug("Could not refresh PATH from registry: %s", e)
|
|
61
|
+
|
|
62
|
+
# Also try to add common installation paths
|
|
63
|
+
common_paths = [
|
|
64
|
+
r"C:\Program Files\FLM",
|
|
65
|
+
r"C:\Program Files (x86)\FLM",
|
|
66
|
+
os.path.expanduser(r"~\AppData\Local\FLM"),
|
|
67
|
+
]
|
|
68
|
+
for path in common_paths:
|
|
69
|
+
if os.path.exists(path) and path not in os.environ.get("PATH", ""):
|
|
70
|
+
os.environ["PATH"] = path + ";" + os.environ.get("PATH", "")
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def install_flm():
|
|
74
|
+
"""
|
|
75
|
+
Check if FLM is installed and at minimum version.
|
|
76
|
+
If not, download and run the GUI installer, then wait for completion.
|
|
77
|
+
"""
|
|
78
|
+
# Check current FLM installation
|
|
79
|
+
current_version = check_flm_version()
|
|
80
|
+
|
|
81
|
+
if current_version and Version(current_version) >= Version(FLM_MINIMUM_VERSION):
|
|
82
|
+
logging.info(
|
|
83
|
+
"FLM v%s is already installed and meets minimum version requirement (v%s)",
|
|
84
|
+
current_version,
|
|
85
|
+
FLM_MINIMUM_VERSION,
|
|
86
|
+
)
|
|
87
|
+
return
|
|
88
|
+
|
|
89
|
+
if current_version:
|
|
90
|
+
logging.info(
|
|
91
|
+
"FLM v%s is installed but below minimum version v%s. Upgrading...",
|
|
92
|
+
current_version,
|
|
93
|
+
FLM_MINIMUM_VERSION,
|
|
94
|
+
)
|
|
95
|
+
else:
|
|
96
|
+
logging.info(
|
|
97
|
+
"FLM not found. Installing FLM v%s or later...", FLM_MINIMUM_VERSION
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
# Download the installer
|
|
101
|
+
# pylint: disable=line-too-long
|
|
102
|
+
installer_url = "https://github.com/FastFlowLM/FastFlowLM/releases/latest/download/flm-setup.exe"
|
|
103
|
+
installer_path = os.path.join(tempfile.gettempdir(), "flm-setup.exe")
|
|
104
|
+
|
|
105
|
+
try:
|
|
106
|
+
# Remove existing installer if present
|
|
107
|
+
if os.path.exists(installer_path):
|
|
108
|
+
os.remove(installer_path)
|
|
109
|
+
|
|
110
|
+
logging.info("Downloading FLM installer...")
|
|
111
|
+
response = requests.get(installer_url, stream=True, timeout=30)
|
|
112
|
+
response.raise_for_status()
|
|
113
|
+
|
|
114
|
+
# Save installer to disk
|
|
115
|
+
with open(installer_path, "wb") as f:
|
|
116
|
+
for chunk in response.iter_content(chunk_size=8192):
|
|
117
|
+
f.write(chunk)
|
|
118
|
+
f.flush()
|
|
119
|
+
os.fsync(f.fileno())
|
|
120
|
+
|
|
121
|
+
logging.info("Downloaded FLM installer to %s", installer_path)
|
|
122
|
+
|
|
123
|
+
# Launch the installer GUI
|
|
124
|
+
logging.warning(
|
|
125
|
+
"Launching FLM installer GUI. Please complete the installation..."
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
# Launch installer and wait for it to complete
|
|
129
|
+
if os.name == "nt": # Windows
|
|
130
|
+
process = subprocess.Popen([installer_path], shell=True)
|
|
131
|
+
else:
|
|
132
|
+
process = subprocess.Popen([installer_path])
|
|
133
|
+
|
|
134
|
+
# Wait for installer to complete
|
|
135
|
+
process.wait()
|
|
136
|
+
|
|
137
|
+
if process.returncode != 0:
|
|
138
|
+
raise RuntimeError(
|
|
139
|
+
f"FLM installer failed with exit code {process.returncode}"
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
logging.info("FLM installer completed successfully")
|
|
143
|
+
|
|
144
|
+
# Refresh environment to pick up new PATH entries
|
|
145
|
+
refresh_environment()
|
|
146
|
+
|
|
147
|
+
# Wait a moment for system to update
|
|
148
|
+
time.sleep(2)
|
|
149
|
+
|
|
150
|
+
# Verify installation
|
|
151
|
+
max_retries = 10
|
|
152
|
+
for attempt in range(max_retries):
|
|
153
|
+
new_version = check_flm_version()
|
|
154
|
+
if new_version and Version(new_version) >= Version(FLM_MINIMUM_VERSION):
|
|
155
|
+
logging.info("FLM v%s successfully installed and verified", new_version)
|
|
156
|
+
return
|
|
157
|
+
|
|
158
|
+
if attempt < max_retries - 1:
|
|
159
|
+
logging.debug(
|
|
160
|
+
"FLM not yet available in PATH, retrying... (attempt %d/%d)",
|
|
161
|
+
attempt + 1,
|
|
162
|
+
max_retries,
|
|
163
|
+
)
|
|
164
|
+
time.sleep(3)
|
|
165
|
+
refresh_environment()
|
|
166
|
+
|
|
167
|
+
# Final check failed
|
|
168
|
+
raise RuntimeError(
|
|
169
|
+
"FLM installation completed but 'flm' command is not available in PATH. "
|
|
170
|
+
"Please ensure FLM is properly installed and available in your system PATH."
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
except requests.RequestException as e:
|
|
174
|
+
raise RuntimeError(f"Failed to download FLM installer: {e}") from e
|
|
175
|
+
except Exception as e:
|
|
176
|
+
raise RuntimeError(f"FLM installation failed: {e}") from e
|
|
177
|
+
finally:
|
|
178
|
+
# Clean up installer file
|
|
179
|
+
if os.path.exists(installer_path):
|
|
180
|
+
try:
|
|
181
|
+
os.remove(installer_path)
|
|
182
|
+
except OSError:
|
|
183
|
+
pass # Ignore cleanup errors
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
def download_flm_model(config_checkpoint, _=None, do_not_upgrade=False) -> dict:
|
|
187
|
+
"""
|
|
188
|
+
Downloads the FLM model for the given configuration.
|
|
189
|
+
|
|
190
|
+
Args:
|
|
191
|
+
config_checkpoint: name of the FLM model to install.
|
|
192
|
+
_: placeholder for `config_mmproj`, which is standard
|
|
193
|
+
for WrappedServer (see llamacpp/utils.py) .
|
|
194
|
+
do_not_upgrade: whether to re-download the model if it is already
|
|
195
|
+
available.
|
|
196
|
+
"""
|
|
197
|
+
|
|
198
|
+
if do_not_upgrade:
|
|
199
|
+
command = ["flm", "pull", f"{config_checkpoint}"]
|
|
200
|
+
else:
|
|
201
|
+
command = ["flm", "pull", f"{config_checkpoint}", "--force"]
|
|
202
|
+
|
|
203
|
+
subprocess.run(command, check=True)
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
def get_flm_installed_models() -> List[str]:
|
|
207
|
+
"""
|
|
208
|
+
Parse FLM model list and return installed model checkpoints.
|
|
209
|
+
|
|
210
|
+
Returns:
|
|
211
|
+
List of installed FLM model checkpoints (e.g., ["llama3.2:1b", "gemma3:4b"])
|
|
212
|
+
"""
|
|
213
|
+
try:
|
|
214
|
+
result = subprocess.run(
|
|
215
|
+
["flm", "list"],
|
|
216
|
+
capture_output=True,
|
|
217
|
+
text=True,
|
|
218
|
+
check=True,
|
|
219
|
+
encoding="utf-8",
|
|
220
|
+
errors="replace",
|
|
221
|
+
)
|
|
222
|
+
|
|
223
|
+
# Check if we got valid output
|
|
224
|
+
if not result.stdout:
|
|
225
|
+
return []
|
|
226
|
+
|
|
227
|
+
installed_checkpoints = []
|
|
228
|
+
|
|
229
|
+
lines = result.stdout.strip().split("\n")
|
|
230
|
+
for line in lines:
|
|
231
|
+
line = line.strip()
|
|
232
|
+
if line.startswith("- "):
|
|
233
|
+
# Remove the leading "- " and parse the model info
|
|
234
|
+
model_info = line[2:].strip()
|
|
235
|
+
|
|
236
|
+
# Check if model is installed (✅)
|
|
237
|
+
if model_info.endswith(" ✅"):
|
|
238
|
+
checkpoint = model_info[:-2].strip()
|
|
239
|
+
installed_checkpoints.append(checkpoint)
|
|
240
|
+
|
|
241
|
+
return installed_checkpoints
|
|
242
|
+
|
|
243
|
+
except (subprocess.CalledProcessError, FileNotFoundError, AttributeError):
|
|
244
|
+
# FLM not installed, not available, or output parsing failed
|
|
245
|
+
return []
|
|
246
|
+
|
|
247
|
+
|
|
248
|
+
def is_flm_available() -> bool:
|
|
249
|
+
"""
|
|
250
|
+
Check if FLM is available and meets minimum version requirements.
|
|
251
|
+
"""
|
|
252
|
+
current_version = check_flm_version()
|
|
253
|
+
return current_version is not None and Version(current_version) >= Version(
|
|
254
|
+
FLM_MINIMUM_VERSION
|
|
255
|
+
)
|