lemonade-sdk 8.1.12__tar.gz → 8.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lemonade-sdk might be problematic. Click here for more details.
- {lemonade_sdk-8.1.12/src/lemonade_sdk.egg-info → lemonade_sdk-8.2.0}/PKG-INFO +4 -3
- {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/setup.py +3 -2
- {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/common/system_info.py +0 -26
- {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/tools/flm/utils.py +70 -22
- {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/tools/llamacpp/load.py +10 -1
- {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/tools/llamacpp/utils.py +82 -8
- {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/tools/oga/load.py +38 -142
- lemonade_sdk-8.2.0/src/lemonade/tools/oga/migration.py +403 -0
- {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/tools/server/llamacpp.py +20 -1
- {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/tools/server/serve.py +334 -16
- {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/tools/server/static/js/models.js +416 -18
- {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/tools/server/static/js/shared.js +41 -4
- {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/tools/server/static/styles.css +204 -0
- {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/tools/server/static/webapp.html +32 -0
- lemonade_sdk-8.2.0/src/lemonade/version.py +1 -0
- lemonade_sdk-8.2.0/src/lemonade_install/install.py +239 -0
- {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0/src/lemonade_sdk.egg-info}/PKG-INFO +4 -3
- {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade_sdk.egg-info/SOURCES.txt +1 -0
- {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade_sdk.egg-info/requires.txt +3 -2
- {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade_server/cli.py +10 -0
- {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade_server/model_manager.py +172 -11
- {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade_server/server_models.json +94 -71
- lemonade_sdk-8.1.12/src/lemonade/version.py +0 -1
- lemonade_sdk-8.1.12/src/lemonade_install/install.py +0 -785
- {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/LICENSE +0 -0
- {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/NOTICE.md +0 -0
- {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/README.md +0 -0
- {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/pyproject.toml +0 -0
- {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/setup.cfg +0 -0
- {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/__init__.py +0 -0
- {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/api.py +0 -0
- {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/cache.py +0 -0
- {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/cli.py +0 -0
- {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/common/__init__.py +0 -0
- {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/common/build.py +0 -0
- {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/common/cli_helpers.py +0 -0
- {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/common/exceptions.py +0 -0
- {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/common/filesystem.py +0 -0
- {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/common/inference_engines.py +0 -0
- {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/common/network.py +0 -0
- {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/common/printing.py +0 -0
- {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/common/status.py +0 -0
- {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/common/test_helpers.py +0 -0
- {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/profilers/__init__.py +0 -0
- {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/profilers/agt_power.py +0 -0
- {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/profilers/hwinfo_power.py +0 -0
- {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/profilers/memory_tracker.py +0 -0
- {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/profilers/profiler.py +0 -0
- {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/sequence.py +0 -0
- {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/state.py +0 -0
- {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/tools/__init__.py +0 -0
- {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/tools/accuracy.py +0 -0
- {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/tools/adapter.py +0 -0
- {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/tools/bench.py +0 -0
- {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/tools/flm/__init__.py +0 -0
- {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/tools/huggingface/bench.py +0 -0
- {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/tools/huggingface/load.py +0 -0
- {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/tools/huggingface/utils.py +0 -0
- {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/tools/humaneval.py +0 -0
- {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/tools/llamacpp/bench.py +0 -0
- {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/tools/management_tools.py +0 -0
- {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/tools/mmlu.py +0 -0
- {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/tools/oga/__init__.py +0 -0
- {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/tools/oga/bench.py +0 -0
- {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/tools/oga/utils.py +0 -0
- {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/tools/perplexity.py +0 -0
- {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/tools/prompt.py +0 -0
- {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/tools/report/__init__.py +0 -0
- {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/tools/report/llm_report.py +0 -0
- {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/tools/report/table.py +0 -0
- {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/tools/server/__init__.py +0 -0
- {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/tools/server/flm.py +0 -0
- {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/tools/server/static/favicon.ico +0 -0
- {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/tools/server/static/js/chat.js +0 -0
- {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/tools/server/static/js/model-settings.js +0 -0
- {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/tools/server/static/logs.html +0 -0
- {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/tools/server/tool_calls.py +0 -0
- {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/tools/server/tray.py +0 -0
- {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/tools/server/utils/macos_tray.py +0 -0
- {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/tools/server/utils/port.py +0 -0
- {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/tools/server/utils/thread.py +0 -0
- {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/tools/server/utils/windows_tray.py +0 -0
- {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/tools/server/webapp.py +0 -0
- {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/tools/server/wrapped_server.py +0 -0
- {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/tools/tool.py +0 -0
- {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade_install/__init__.py +0 -0
- {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade_sdk.egg-info/dependency_links.txt +0 -0
- {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade_sdk.egg-info/entry_points.txt +0 -0
- {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade_sdk.egg-info/top_level.txt +0 -0
- {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade_server/pydantic_models.py +0 -0
- {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade_server/settings.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: lemonade-sdk
|
|
3
|
-
Version: 8.
|
|
3
|
+
Version: 8.2.0
|
|
4
4
|
Summary: Lemonade SDK: Your LLM Aide for Validation and Deployment
|
|
5
5
|
Author-email: lemonade@amd.com
|
|
6
6
|
Requires-Python: >=3.10, <3.14
|
|
@@ -29,12 +29,13 @@ Requires-Dist: tabulate
|
|
|
29
29
|
Requires-Dist: sentencepiece
|
|
30
30
|
Requires-Dist: huggingface-hub[hf_xet]==0.33.0
|
|
31
31
|
Requires-Dist: python-dotenv
|
|
32
|
+
Requires-Dist: python-multipart
|
|
32
33
|
Requires-Dist: rumps>=0.4.0; sys_platform == "darwin"
|
|
33
34
|
Provides-Extra: oga-ryzenai
|
|
34
|
-
Requires-Dist: onnxruntime-genai-directml-ryzenai==0.
|
|
35
|
+
Requires-Dist: onnxruntime-genai-directml-ryzenai==0.9.2; extra == "oga-ryzenai"
|
|
35
36
|
Requires-Dist: protobuf>=6.30.1; extra == "oga-ryzenai"
|
|
36
37
|
Provides-Extra: oga-cpu
|
|
37
|
-
Requires-Dist: onnxruntime-genai==0.
|
|
38
|
+
Requires-Dist: onnxruntime-genai==0.9.2; extra == "oga-cpu"
|
|
38
39
|
Requires-Dist: onnxruntime>=1.22.0; extra == "oga-cpu"
|
|
39
40
|
Provides-Extra: dev
|
|
40
41
|
Requires-Dist: torch>=2.6.0; extra == "dev"
|
|
@@ -49,6 +49,7 @@ setup(
|
|
|
49
49
|
"sentencepiece",
|
|
50
50
|
"huggingface-hub[hf_xet]==0.33.0",
|
|
51
51
|
"python-dotenv",
|
|
52
|
+
"python-multipart",
|
|
52
53
|
# macOS-specific dependencies
|
|
53
54
|
"rumps>=0.4.0; sys_platform == 'darwin'",
|
|
54
55
|
],
|
|
@@ -57,11 +58,11 @@ setup(
|
|
|
57
58
|
# applications, without including developer-focused tools
|
|
58
59
|
# Primary NPU extra using unified PyPI package
|
|
59
60
|
"oga-ryzenai": [
|
|
60
|
-
"onnxruntime-genai-directml-ryzenai==0.
|
|
61
|
+
"onnxruntime-genai-directml-ryzenai==0.9.2",
|
|
61
62
|
"protobuf>=6.30.1",
|
|
62
63
|
],
|
|
63
64
|
"oga-cpu": [
|
|
64
|
-
"onnxruntime-genai==0.
|
|
65
|
+
"onnxruntime-genai==0.9.2",
|
|
65
66
|
"onnxruntime >=1.22.0",
|
|
66
67
|
],
|
|
67
68
|
# Developer-focused tools for benchmarking, accuracy testing, and
|
|
@@ -1110,32 +1110,6 @@ class LinuxSystemInfo(SystemInfo):
|
|
|
1110
1110
|
|
|
1111
1111
|
return ""
|
|
1112
1112
|
|
|
1113
|
-
def _get_nvidia_vram_smi_linux(self) -> float:
|
|
1114
|
-
"""
|
|
1115
|
-
Get NVIDIA GPU VRAM on Linux using nvidia-smi command.
|
|
1116
|
-
|
|
1117
|
-
Returns:
|
|
1118
|
-
float: VRAM in GB, or 0.0 if detection fails
|
|
1119
|
-
"""
|
|
1120
|
-
try:
|
|
1121
|
-
output = (
|
|
1122
|
-
subprocess.check_output(
|
|
1123
|
-
"nvidia-smi --query-gpu=memory.total --format=csv,noheader,nounits",
|
|
1124
|
-
shell=True,
|
|
1125
|
-
stderr=subprocess.DEVNULL,
|
|
1126
|
-
)
|
|
1127
|
-
.decode()
|
|
1128
|
-
.strip()
|
|
1129
|
-
)
|
|
1130
|
-
|
|
1131
|
-
# nvidia-smi returns memory in MB
|
|
1132
|
-
vram_mb = int(output.split("\n")[0])
|
|
1133
|
-
vram_gb = round(vram_mb / 1024, 1)
|
|
1134
|
-
return vram_gb
|
|
1135
|
-
except (subprocess.CalledProcessError, FileNotFoundError, ValueError):
|
|
1136
|
-
pass
|
|
1137
|
-
return 0.0
|
|
1138
|
-
|
|
1139
1113
|
@staticmethod
|
|
1140
1114
|
def get_processor_name() -> str:
|
|
1141
1115
|
"""
|
|
@@ -10,16 +10,46 @@ import time
|
|
|
10
10
|
from typing import List, Optional
|
|
11
11
|
|
|
12
12
|
import requests
|
|
13
|
-
from packaging.version import Version
|
|
13
|
+
from packaging.version import Version, InvalidVersion
|
|
14
14
|
|
|
15
15
|
|
|
16
|
-
|
|
16
|
+
def get_flm_latest_version() -> Optional[str]:
|
|
17
|
+
"""
|
|
18
|
+
Get and return the latest FLM version from "https://github.com/FastFlowLM/FastFlowLM/tags"
|
|
19
|
+
This uses the GitHub tags API.
|
|
20
|
+
"""
|
|
21
|
+
url = "https://api.github.com/repos/FastFlowLM/FastFlowLM/tags"
|
|
22
|
+
try:
|
|
23
|
+
response = requests.get(url, timeout=10)
|
|
24
|
+
response.raise_for_status()
|
|
25
|
+
tags = response.json()
|
|
26
|
+
if not tags:
|
|
27
|
+
return None
|
|
28
|
+
# Tags are sorted in reverse chronological order; find the first that looks like a version
|
|
29
|
+
for tag in tags:
|
|
30
|
+
tag_name = tag.get("name", "")
|
|
31
|
+
# Accept tags of the form v0.9.10, 0.9.10, etc.
|
|
32
|
+
if tag_name.startswith("v"):
|
|
33
|
+
version_candidate = tag_name[1:]
|
|
34
|
+
else:
|
|
35
|
+
version_candidate = tag_name
|
|
36
|
+
try:
|
|
37
|
+
# validate it's a version string
|
|
38
|
+
_ = Version(version_candidate)
|
|
39
|
+
return version_candidate
|
|
40
|
+
except InvalidVersion:
|
|
41
|
+
continue
|
|
42
|
+
return None
|
|
43
|
+
except requests.exceptions.RequestException as e:
|
|
44
|
+
logging.debug("Error retrieving latest FLM version: %s", e)
|
|
45
|
+
return None
|
|
17
46
|
|
|
18
47
|
|
|
19
48
|
def check_flm_version() -> Optional[str]:
|
|
20
49
|
"""
|
|
21
50
|
Check if FLM is installed and return version, or None if not available.
|
|
22
51
|
"""
|
|
52
|
+
latest_version_str = get_flm_latest_version()
|
|
23
53
|
try:
|
|
24
54
|
result = subprocess.run(
|
|
25
55
|
["flm", "version"],
|
|
@@ -34,11 +64,11 @@ def check_flm_version() -> Optional[str]:
|
|
|
34
64
|
output = result.stdout.strip()
|
|
35
65
|
if output.startswith("FLM v"):
|
|
36
66
|
version_str = output[5:] # Remove "FLM v" prefix
|
|
37
|
-
return version_str
|
|
38
|
-
return None
|
|
67
|
+
return version_str, latest_version_str
|
|
68
|
+
return None, latest_version_str
|
|
39
69
|
|
|
40
70
|
except (subprocess.CalledProcessError, FileNotFoundError):
|
|
41
|
-
return None
|
|
71
|
+
return None, latest_version_str
|
|
42
72
|
|
|
43
73
|
|
|
44
74
|
def refresh_environment():
|
|
@@ -76,31 +106,42 @@ def install_flm():
|
|
|
76
106
|
If not, download and run the GUI installer, then wait for completion.
|
|
77
107
|
"""
|
|
78
108
|
# Check current FLM installation
|
|
79
|
-
current_version = check_flm_version()
|
|
109
|
+
current_version, latest_version = check_flm_version()
|
|
80
110
|
|
|
81
|
-
if
|
|
111
|
+
if (
|
|
112
|
+
current_version
|
|
113
|
+
and latest_version
|
|
114
|
+
and Version(current_version) == Version(latest_version)
|
|
115
|
+
):
|
|
82
116
|
logging.info(
|
|
83
|
-
"FLM v%s is already installed and
|
|
117
|
+
"FLM v%s is already installed and is up to date (latest version: v%s).",
|
|
84
118
|
current_version,
|
|
85
|
-
|
|
119
|
+
latest_version,
|
|
86
120
|
)
|
|
87
121
|
return
|
|
88
122
|
|
|
89
123
|
if current_version:
|
|
124
|
+
if not latest_version:
|
|
125
|
+
logging.info(
|
|
126
|
+
"Unable to detect the latest FLM version; continuing with installed FLM v%s.",
|
|
127
|
+
current_version,
|
|
128
|
+
)
|
|
129
|
+
return
|
|
90
130
|
logging.info(
|
|
91
|
-
"FLM v%s is installed but below
|
|
131
|
+
"FLM v%s is installed but below latest version v%s. Upgrading...",
|
|
92
132
|
current_version,
|
|
93
|
-
|
|
133
|
+
latest_version,
|
|
94
134
|
)
|
|
135
|
+
verysilent = True
|
|
95
136
|
else:
|
|
96
|
-
logging.info(
|
|
97
|
-
|
|
98
|
-
)
|
|
137
|
+
logging.info("FLM not found. Installing FLM v%s or later...", latest_version)
|
|
138
|
+
verysilent = False
|
|
99
139
|
|
|
100
140
|
# Download the installer
|
|
101
141
|
# pylint: disable=line-too-long
|
|
102
142
|
installer_url = "https://github.com/FastFlowLM/FastFlowLM/releases/latest/download/flm-setup.exe"
|
|
103
143
|
installer_path = os.path.join(tempfile.gettempdir(), "flm-setup.exe")
|
|
144
|
+
installer_args = [installer_path, "/VERYSILENT"] if verysilent else [installer_path]
|
|
104
145
|
|
|
105
146
|
try:
|
|
106
147
|
# Remove existing installer if present
|
|
@@ -123,13 +164,15 @@ def install_flm():
|
|
|
123
164
|
# Launch the installer GUI
|
|
124
165
|
logging.warning(
|
|
125
166
|
"Launching FLM installer GUI. Please complete the installation..."
|
|
167
|
+
if not verysilent
|
|
168
|
+
else "Installing FLM..."
|
|
126
169
|
)
|
|
127
170
|
|
|
128
171
|
# Launch installer and wait for it to complete
|
|
129
172
|
if os.name == "nt": # Windows
|
|
130
|
-
process = subprocess.Popen(
|
|
173
|
+
process = subprocess.Popen(installer_args, shell=True)
|
|
131
174
|
else:
|
|
132
|
-
process = subprocess.Popen(
|
|
175
|
+
process = subprocess.Popen(installer_args)
|
|
133
176
|
|
|
134
177
|
# Wait for installer to complete
|
|
135
178
|
process.wait()
|
|
@@ -150,8 +193,8 @@ def install_flm():
|
|
|
150
193
|
# Verify installation
|
|
151
194
|
max_retries = 10
|
|
152
195
|
for attempt in range(max_retries):
|
|
153
|
-
new_version = check_flm_version()
|
|
154
|
-
if new_version and Version(new_version)
|
|
196
|
+
new_version, latest_version = check_flm_version()
|
|
197
|
+
if new_version and Version(new_version) == Version(latest_version):
|
|
155
198
|
logging.info("FLM v%s successfully installed and verified", new_version)
|
|
156
199
|
return
|
|
157
200
|
|
|
@@ -240,7 +283,12 @@ def get_flm_installed_models() -> List[str]:
|
|
|
240
283
|
|
|
241
284
|
return installed_checkpoints
|
|
242
285
|
|
|
243
|
-
except (
|
|
286
|
+
except (
|
|
287
|
+
subprocess.CalledProcessError,
|
|
288
|
+
FileNotFoundError,
|
|
289
|
+
AttributeError,
|
|
290
|
+
NotADirectoryError,
|
|
291
|
+
):
|
|
244
292
|
# FLM not installed, not available, or output parsing failed
|
|
245
293
|
return []
|
|
246
294
|
|
|
@@ -249,7 +297,7 @@ def is_flm_available() -> bool:
|
|
|
249
297
|
"""
|
|
250
298
|
Check if FLM is available and meets minimum version requirements.
|
|
251
299
|
"""
|
|
252
|
-
current_version = check_flm_version()
|
|
253
|
-
return current_version is not None and Version(current_version)
|
|
254
|
-
|
|
300
|
+
current_version, latest_version = check_flm_version()
|
|
301
|
+
return current_version is not None and Version(current_version) == Version(
|
|
302
|
+
latest_version
|
|
255
303
|
)
|
|
@@ -97,6 +97,7 @@ class LoadLlamaCpp(FirstTool):
|
|
|
97
97
|
get_llama_installed_version,
|
|
98
98
|
parse_checkpoint,
|
|
99
99
|
download_gguf,
|
|
100
|
+
resolve_local_gguf_model,
|
|
100
101
|
get_local_checkpoint_path,
|
|
101
102
|
LlamaCppTokenizerAdapter,
|
|
102
103
|
LlamaCppAdapter,
|
|
@@ -169,8 +170,16 @@ class LoadLlamaCpp(FirstTool):
|
|
|
169
170
|
)
|
|
170
171
|
|
|
171
172
|
else:
|
|
173
|
+
# First, try to resolve from local cache to avoid unnecessary downloads
|
|
174
|
+
base_checkpoint, variant = parse_checkpoint(checkpoint)
|
|
175
|
+
snapshot_files = resolve_local_gguf_model(
|
|
176
|
+
base_checkpoint, variant, None
|
|
177
|
+
)
|
|
178
|
+
|
|
179
|
+
# If not found locally, download from internet
|
|
180
|
+
if not snapshot_files:
|
|
181
|
+
snapshot_files = download_gguf(checkpoint)
|
|
172
182
|
|
|
173
|
-
snapshot_files = download_gguf(checkpoint)
|
|
174
183
|
full_model_path = snapshot_files["variant"]
|
|
175
184
|
model_to_use = os.path.basename(full_model_path)
|
|
176
185
|
|
|
@@ -10,9 +10,7 @@ import requests
|
|
|
10
10
|
import lemonade.common.build as build
|
|
11
11
|
import lemonade.common.printing as printing
|
|
12
12
|
from lemonade.tools.adapter import PassthroughTokenizer, ModelAdapter
|
|
13
|
-
|
|
14
13
|
from lemonade.common.system_info import get_system_info
|
|
15
|
-
|
|
16
14
|
from dotenv import set_key, load_dotenv
|
|
17
15
|
|
|
18
16
|
LLAMA_VERSION_VULKAN = "b6510"
|
|
@@ -378,7 +376,7 @@ def install_llamacpp(backend):
|
|
|
378
376
|
import stat
|
|
379
377
|
|
|
380
378
|
# Find and make executable files executable
|
|
381
|
-
for root,
|
|
379
|
+
for root, _, files in os.walk(llama_server_exe_dir):
|
|
382
380
|
for file in files:
|
|
383
381
|
file_path = os.path.join(root, file)
|
|
384
382
|
# Make files in bin/ directories executable
|
|
@@ -656,15 +654,91 @@ def identify_gguf_models(
|
|
|
656
654
|
return core_files, sharded_files
|
|
657
655
|
|
|
658
656
|
|
|
659
|
-
def
|
|
657
|
+
def resolve_local_gguf_model(
|
|
658
|
+
checkpoint: str, variant: str, config_mmproj: str = None
|
|
659
|
+
) -> dict | None:
|
|
660
660
|
"""
|
|
661
|
-
|
|
661
|
+
Attempts to resolve a GGUF model from the local HuggingFace cache.
|
|
662
|
+
"""
|
|
663
|
+
from huggingface_hub.constants import HF_HUB_CACHE
|
|
664
|
+
|
|
665
|
+
# Convert checkpoint to cache directory format
|
|
666
|
+
if checkpoint.startswith("models--"):
|
|
667
|
+
model_cache_dir = os.path.join(HF_HUB_CACHE, checkpoint)
|
|
668
|
+
else:
|
|
669
|
+
# This is a HuggingFace repo - convert to cache directory format
|
|
670
|
+
repo_cache_name = checkpoint.replace("/", "--")
|
|
671
|
+
model_cache_dir = os.path.join(HF_HUB_CACHE, f"models--{repo_cache_name}")
|
|
672
|
+
|
|
673
|
+
# Check if the cache directory exists
|
|
674
|
+
if not os.path.exists(model_cache_dir):
|
|
675
|
+
return None
|
|
676
|
+
|
|
677
|
+
gguf_file_found = None
|
|
678
|
+
|
|
679
|
+
# If variant is specified, look for that specific file
|
|
680
|
+
if variant:
|
|
681
|
+
search_term = variant if variant.endswith(".gguf") else f"{variant}.gguf"
|
|
682
|
+
|
|
683
|
+
for root, _, files in os.walk(model_cache_dir):
|
|
684
|
+
if search_term in files:
|
|
685
|
+
gguf_file_found = os.path.join(root, search_term)
|
|
686
|
+
break
|
|
687
|
+
|
|
688
|
+
# If no variant or variant not found, find any .gguf file (excluding mmproj)
|
|
689
|
+
if not gguf_file_found:
|
|
690
|
+
for root, _, files in os.walk(model_cache_dir):
|
|
691
|
+
gguf_files = [
|
|
692
|
+
f for f in files if f.endswith(".gguf") and "mmproj" not in f.lower()
|
|
693
|
+
]
|
|
694
|
+
if gguf_files:
|
|
695
|
+
gguf_file_found = os.path.join(root, gguf_files[0])
|
|
696
|
+
break
|
|
697
|
+
|
|
698
|
+
# If no GGUF file found, model is not in cache
|
|
699
|
+
if not gguf_file_found:
|
|
700
|
+
return None
|
|
701
|
+
|
|
702
|
+
# Build result dictionary
|
|
703
|
+
result = {"variant": gguf_file_found}
|
|
704
|
+
|
|
705
|
+
# Search for mmproj file if provided
|
|
706
|
+
if config_mmproj:
|
|
707
|
+
for root, _, files in os.walk(model_cache_dir):
|
|
708
|
+
if config_mmproj in files:
|
|
709
|
+
result["mmproj"] = os.path.join(root, config_mmproj)
|
|
710
|
+
break
|
|
711
|
+
|
|
712
|
+
logging.info(f"Resolved local GGUF model: {result}")
|
|
713
|
+
return result
|
|
662
714
|
|
|
663
|
-
|
|
664
|
-
|
|
715
|
+
|
|
716
|
+
def download_gguf(
|
|
717
|
+
config_checkpoint: str, config_mmproj=None, do_not_upgrade: bool = False
|
|
718
|
+
) -> dict:
|
|
665
719
|
"""
|
|
720
|
+
Downloads the GGUF file for the given model configuration from HuggingFace.
|
|
721
|
+
|
|
722
|
+
This function downloads models from the internet. It does NOT check the local cache first.
|
|
723
|
+
Callers should use resolve_local_gguf_model() if they want to check for existing models first.
|
|
724
|
+
|
|
725
|
+
Args:
|
|
726
|
+
config_checkpoint: Checkpoint identifier (file path or HF repo with variant)
|
|
727
|
+
config_mmproj: Optional mmproj file to also download
|
|
728
|
+
do_not_upgrade: If True, use local cache only without attempting to download updates
|
|
666
729
|
|
|
667
|
-
|
|
730
|
+
Returns:
|
|
731
|
+
Dictionary with "variant" (and optionally "mmproj") file paths
|
|
732
|
+
"""
|
|
733
|
+
# Handle direct file path case - if the checkpoint is an actual file on disk
|
|
734
|
+
if os.path.exists(config_checkpoint):
|
|
735
|
+
result = {"variant": config_checkpoint}
|
|
736
|
+
if config_mmproj:
|
|
737
|
+
result["mmproj"] = config_mmproj
|
|
738
|
+
return result
|
|
739
|
+
|
|
740
|
+
# Parse checkpoint to extract base and variant
|
|
741
|
+
# Checkpoint format: repo_name:variant (e.g., "unsloth/Qwen3-0.6B-GGUF:Q4_0")
|
|
668
742
|
checkpoint, variant = parse_checkpoint(config_checkpoint)
|
|
669
743
|
|
|
670
744
|
# Identify the GGUF model files in the repository that match the variant
|
|
@@ -4,7 +4,6 @@
|
|
|
4
4
|
|
|
5
5
|
import argparse
|
|
6
6
|
import subprocess
|
|
7
|
-
import sys
|
|
8
7
|
import os
|
|
9
8
|
import json
|
|
10
9
|
import webbrowser
|
|
@@ -38,6 +37,17 @@ execution_providers = {
|
|
|
38
37
|
}
|
|
39
38
|
|
|
40
39
|
|
|
40
|
+
def find_onnx_files_recursively(directory):
|
|
41
|
+
"""
|
|
42
|
+
Recursively search for ONNX files in a directory and its subdirectories.
|
|
43
|
+
"""
|
|
44
|
+
for _, _, files in os.walk(directory):
|
|
45
|
+
for file in files:
|
|
46
|
+
if file.endswith(".onnx"):
|
|
47
|
+
return True
|
|
48
|
+
return False
|
|
49
|
+
|
|
50
|
+
|
|
41
51
|
def _get_npu_driver_version():
|
|
42
52
|
"""
|
|
43
53
|
Get the NPU driver version using PowerShell directly.
|
|
@@ -321,6 +331,7 @@ class OgaLoad(FirstTool):
|
|
|
321
331
|
|
|
322
332
|
@staticmethod
|
|
323
333
|
def _setup_model_dependencies(full_model_path, device, ryzenai_version, oga_path):
|
|
334
|
+
# pylint: disable=unused-argument
|
|
324
335
|
"""
|
|
325
336
|
Sets up model dependencies for hybrid and NPU inference by:
|
|
326
337
|
1. Configuring the custom_ops_library path in genai_config.json.
|
|
@@ -328,116 +339,35 @@ class OgaLoad(FirstTool):
|
|
|
328
339
|
3. Check NPU driver version if required for device and ryzenai_version.
|
|
329
340
|
"""
|
|
330
341
|
|
|
331
|
-
|
|
342
|
+
# For RyzenAI 1.6.0, check NPU driver version for NPU and hybrid devices
|
|
343
|
+
if device in ["npu", "hybrid"]:
|
|
344
|
+
required_driver_version = REQUIRED_NPU_DRIVER_VERSION
|
|
332
345
|
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
required_driver_version = REQUIRED_NPU_DRIVER_VERSION
|
|
344
|
-
|
|
345
|
-
current_driver_version = _get_npu_driver_version()
|
|
346
|
-
|
|
347
|
-
if not current_driver_version:
|
|
348
|
-
printing.log_warning(
|
|
349
|
-
f"NPU driver not found. {device.upper()} inference requires NPU driver "
|
|
350
|
-
f"version {required_driver_version}.\n"
|
|
351
|
-
"Please download and install the NPU Driver from:\n"
|
|
352
|
-
f"{NPU_DRIVER_DOWNLOAD_URL}\n"
|
|
353
|
-
"NPU functionality may not work properly."
|
|
354
|
-
)
|
|
355
|
-
_open_driver_install_page()
|
|
356
|
-
|
|
357
|
-
elif not _compare_driver_versions(
|
|
358
|
-
current_driver_version, required_driver_version
|
|
359
|
-
):
|
|
360
|
-
printing.log_warning(
|
|
361
|
-
f"Incorrect NPU driver version detected: {current_driver_version}\n"
|
|
362
|
-
f"{device.upper()} inference with RyzenAI 1.5.0 requires driver "
|
|
363
|
-
f"version {required_driver_version} or higher.\n"
|
|
364
|
-
"Please download and install the correct NPU Driver from:\n"
|
|
365
|
-
f"{NPU_DRIVER_DOWNLOAD_URL}\n"
|
|
366
|
-
"NPU functionality may not work properly."
|
|
367
|
-
)
|
|
368
|
-
_open_driver_install_page()
|
|
369
|
-
|
|
370
|
-
if device == "npu":
|
|
371
|
-
# For 1.5.0, custom ops are in the conda environment's onnxruntime package
|
|
372
|
-
custom_ops_path = os.path.join(
|
|
373
|
-
env_path,
|
|
374
|
-
"Lib",
|
|
375
|
-
"site-packages",
|
|
376
|
-
"onnxruntime",
|
|
377
|
-
"capi",
|
|
378
|
-
"onnxruntime_vitis_ai_custom_ops.dll",
|
|
379
|
-
)
|
|
380
|
-
dll_source_path = os.path.join(
|
|
381
|
-
env_path, "Lib", "site-packages", "onnxruntime", "capi"
|
|
382
|
-
)
|
|
383
|
-
required_dlls = ["dyn_dispatch_core.dll", "xaiengine.dll"]
|
|
384
|
-
else:
|
|
385
|
-
custom_ops_path = os.path.join(
|
|
386
|
-
env_path,
|
|
387
|
-
"Lib",
|
|
388
|
-
"site-packages",
|
|
389
|
-
"onnxruntime_genai",
|
|
390
|
-
"onnx_custom_ops.dll",
|
|
391
|
-
)
|
|
392
|
-
dll_source_path = os.path.join(
|
|
393
|
-
env_path, "Lib", "site-packages", "onnxruntime_genai"
|
|
394
|
-
)
|
|
395
|
-
required_dlls = ["libutf8_validity.dll", "abseil_dll.dll"]
|
|
396
|
-
|
|
397
|
-
# Validate that all required DLLs exist in the source directory
|
|
398
|
-
missing_dlls = []
|
|
399
|
-
if not os.path.exists(custom_ops_path):
|
|
400
|
-
missing_dlls.append(custom_ops_path)
|
|
401
|
-
|
|
402
|
-
for dll_name in required_dlls:
|
|
403
|
-
dll_source = os.path.join(dll_source_path, dll_name)
|
|
404
|
-
if not os.path.exists(dll_source):
|
|
405
|
-
missing_dlls.append(dll_source)
|
|
406
|
-
|
|
407
|
-
if missing_dlls:
|
|
408
|
-
dll_list = "\n - ".join(missing_dlls)
|
|
409
|
-
raise RuntimeError(
|
|
410
|
-
f"Required DLLs not found for {device} inference:\n - {dll_list}\n"
|
|
411
|
-
f"Please ensure your RyzenAI installation is complete and supports {device}."
|
|
346
|
+
current_driver_version = _get_npu_driver_version()
|
|
347
|
+
rai_version, _ = _get_ryzenai_version_info(device)
|
|
348
|
+
|
|
349
|
+
if not current_driver_version:
|
|
350
|
+
printing.log_warning(
|
|
351
|
+
f"NPU driver not found. {device.upper()} inference requires NPU driver "
|
|
352
|
+
f"version {required_driver_version}.\n"
|
|
353
|
+
"Please download and install the NPU Driver from:\n"
|
|
354
|
+
f"{NPU_DRIVER_DOWNLOAD_URL}\n"
|
|
355
|
+
"NPU functionality may not work properly."
|
|
412
356
|
)
|
|
357
|
+
_open_driver_install_page()
|
|
413
358
|
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
if dll_source_path not in current_path:
|
|
417
|
-
os.environ["PATH"] = dll_source_path + os.pathsep + current_path
|
|
418
|
-
|
|
419
|
-
# Update the model config with custom_ops_library path
|
|
420
|
-
config_path = os.path.join(full_model_path, "genai_config.json")
|
|
421
|
-
if os.path.exists(config_path):
|
|
422
|
-
with open(config_path, "r", encoding="utf-8") as f:
|
|
423
|
-
config = json.load(f)
|
|
424
|
-
|
|
425
|
-
if (
|
|
426
|
-
"model" in config
|
|
427
|
-
and "decoder" in config["model"]
|
|
428
|
-
and "session_options" in config["model"]["decoder"]
|
|
359
|
+
elif not _compare_driver_versions(
|
|
360
|
+
current_driver_version, required_driver_version
|
|
429
361
|
):
|
|
430
|
-
|
|
431
|
-
"
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
f"Model's `genai_config.json` not found in {full_model_path}"
|
|
440
|
-
)
|
|
362
|
+
printing.log_warning(
|
|
363
|
+
f"Incorrect NPU driver version detected: {current_driver_version}\n"
|
|
364
|
+
f"{device.upper()} inference with RyzenAI {rai_version} requires driver "
|
|
365
|
+
f"version {required_driver_version} or higher.\n"
|
|
366
|
+
"Please download and install the correct NPU Driver from:\n"
|
|
367
|
+
f"{NPU_DRIVER_DOWNLOAD_URL}\n"
|
|
368
|
+
"NPU functionality may not work properly."
|
|
369
|
+
)
|
|
370
|
+
_open_driver_install_page()
|
|
441
371
|
|
|
442
372
|
@staticmethod
|
|
443
373
|
def _is_preoptimized_model(input_model_path):
|
|
@@ -502,34 +432,6 @@ class OgaLoad(FirstTool):
|
|
|
502
432
|
|
|
503
433
|
return full_model_path
|
|
504
434
|
|
|
505
|
-
@staticmethod
|
|
506
|
-
def _setup_npu_environment(ryzenai_version, oga_path):
|
|
507
|
-
"""
|
|
508
|
-
Sets up environment for NPU flow of ONNX model and returns saved state to be restored
|
|
509
|
-
later in cleanup.
|
|
510
|
-
"""
|
|
511
|
-
if "1.5.0" in ryzenai_version:
|
|
512
|
-
# For PyPI installation (1.5.0+), no environment setup needed
|
|
513
|
-
return None
|
|
514
|
-
elif "1.4.0" in ryzenai_version:
|
|
515
|
-
# Legacy lemonade-install approach for 1.4.0
|
|
516
|
-
if not os.path.exists(os.path.join(oga_path, "libs", "onnxruntime.dll")):
|
|
517
|
-
raise RuntimeError(
|
|
518
|
-
f"Cannot find libs/onnxruntime.dll in lib folder: {oga_path}"
|
|
519
|
-
)
|
|
520
|
-
|
|
521
|
-
# Save current state so they can be restored after inference.
|
|
522
|
-
saved_state = {"cwd": os.getcwd(), "path": os.environ["PATH"]}
|
|
523
|
-
|
|
524
|
-
# Setup NPU environment (cwd and path will be restored later)
|
|
525
|
-
os.chdir(oga_path)
|
|
526
|
-
os.environ["PATH"] = (
|
|
527
|
-
os.path.join(oga_path, "libs") + os.pathsep + os.environ["PATH"]
|
|
528
|
-
)
|
|
529
|
-
return saved_state
|
|
530
|
-
else:
|
|
531
|
-
raise ValueError(f"Unsupported RyzenAI version: {ryzenai_version}")
|
|
532
|
-
|
|
533
435
|
@staticmethod
|
|
534
436
|
def _load_model_and_setup_state(
|
|
535
437
|
state, full_model_path, checkpoint, trust_remote_code
|
|
@@ -702,8 +604,7 @@ class OgaLoad(FirstTool):
|
|
|
702
604
|
state.save_stat(Keys.CHECKPOINT, checkpoint)
|
|
703
605
|
state.save_stat(Keys.LOCAL_MODEL_FOLDER, full_model_path)
|
|
704
606
|
# See if there is a file ending in ".onnx" in this folder
|
|
705
|
-
|
|
706
|
-
has_onnx_file = any([filename.endswith(".onnx") for filename in dir])
|
|
607
|
+
has_onnx_file = find_onnx_files_recursively(input)
|
|
707
608
|
if not has_onnx_file:
|
|
708
609
|
raise ValueError(
|
|
709
610
|
f"The folder {input} does not contain an ONNX model file."
|
|
@@ -852,15 +753,10 @@ class OgaLoad(FirstTool):
|
|
|
852
753
|
|
|
853
754
|
try:
|
|
854
755
|
if device == "npu":
|
|
855
|
-
saved_env_state = self._setup_npu_environment(
|
|
856
|
-
ryzenai_version, oga_path
|
|
857
|
-
)
|
|
858
756
|
# Set USE_AIE_RoPE based on model type
|
|
859
757
|
os.environ["USE_AIE_RoPE"] = (
|
|
860
758
|
"0" if "phi-" in checkpoint.lower() else "1"
|
|
861
759
|
)
|
|
862
|
-
elif device == "hybrid":
|
|
863
|
-
saved_env_state = None
|
|
864
760
|
|
|
865
761
|
self._load_model_and_setup_state(
|
|
866
762
|
state, full_model_path, checkpoint, trust_remote_code
|