lemonade-sdk 8.1.12__tar.gz → 8.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lemonade-sdk might be problematic. Click here for more details.

Files changed (91) hide show
  1. {lemonade_sdk-8.1.12/src/lemonade_sdk.egg-info → lemonade_sdk-8.2.0}/PKG-INFO +4 -3
  2. {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/setup.py +3 -2
  3. {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/common/system_info.py +0 -26
  4. {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/tools/flm/utils.py +70 -22
  5. {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/tools/llamacpp/load.py +10 -1
  6. {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/tools/llamacpp/utils.py +82 -8
  7. {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/tools/oga/load.py +38 -142
  8. lemonade_sdk-8.2.0/src/lemonade/tools/oga/migration.py +403 -0
  9. {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/tools/server/llamacpp.py +20 -1
  10. {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/tools/server/serve.py +334 -16
  11. {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/tools/server/static/js/models.js +416 -18
  12. {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/tools/server/static/js/shared.js +41 -4
  13. {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/tools/server/static/styles.css +204 -0
  14. {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/tools/server/static/webapp.html +32 -0
  15. lemonade_sdk-8.2.0/src/lemonade/version.py +1 -0
  16. lemonade_sdk-8.2.0/src/lemonade_install/install.py +239 -0
  17. {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0/src/lemonade_sdk.egg-info}/PKG-INFO +4 -3
  18. {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade_sdk.egg-info/SOURCES.txt +1 -0
  19. {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade_sdk.egg-info/requires.txt +3 -2
  20. {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade_server/cli.py +10 -0
  21. {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade_server/model_manager.py +172 -11
  22. {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade_server/server_models.json +94 -71
  23. lemonade_sdk-8.1.12/src/lemonade/version.py +0 -1
  24. lemonade_sdk-8.1.12/src/lemonade_install/install.py +0 -785
  25. {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/LICENSE +0 -0
  26. {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/NOTICE.md +0 -0
  27. {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/README.md +0 -0
  28. {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/pyproject.toml +0 -0
  29. {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/setup.cfg +0 -0
  30. {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/__init__.py +0 -0
  31. {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/api.py +0 -0
  32. {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/cache.py +0 -0
  33. {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/cli.py +0 -0
  34. {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/common/__init__.py +0 -0
  35. {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/common/build.py +0 -0
  36. {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/common/cli_helpers.py +0 -0
  37. {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/common/exceptions.py +0 -0
  38. {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/common/filesystem.py +0 -0
  39. {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/common/inference_engines.py +0 -0
  40. {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/common/network.py +0 -0
  41. {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/common/printing.py +0 -0
  42. {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/common/status.py +0 -0
  43. {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/common/test_helpers.py +0 -0
  44. {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/profilers/__init__.py +0 -0
  45. {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/profilers/agt_power.py +0 -0
  46. {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/profilers/hwinfo_power.py +0 -0
  47. {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/profilers/memory_tracker.py +0 -0
  48. {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/profilers/profiler.py +0 -0
  49. {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/sequence.py +0 -0
  50. {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/state.py +0 -0
  51. {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/tools/__init__.py +0 -0
  52. {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/tools/accuracy.py +0 -0
  53. {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/tools/adapter.py +0 -0
  54. {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/tools/bench.py +0 -0
  55. {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/tools/flm/__init__.py +0 -0
  56. {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/tools/huggingface/bench.py +0 -0
  57. {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/tools/huggingface/load.py +0 -0
  58. {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/tools/huggingface/utils.py +0 -0
  59. {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/tools/humaneval.py +0 -0
  60. {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/tools/llamacpp/bench.py +0 -0
  61. {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/tools/management_tools.py +0 -0
  62. {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/tools/mmlu.py +0 -0
  63. {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/tools/oga/__init__.py +0 -0
  64. {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/tools/oga/bench.py +0 -0
  65. {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/tools/oga/utils.py +0 -0
  66. {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/tools/perplexity.py +0 -0
  67. {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/tools/prompt.py +0 -0
  68. {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/tools/report/__init__.py +0 -0
  69. {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/tools/report/llm_report.py +0 -0
  70. {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/tools/report/table.py +0 -0
  71. {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/tools/server/__init__.py +0 -0
  72. {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/tools/server/flm.py +0 -0
  73. {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/tools/server/static/favicon.ico +0 -0
  74. {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/tools/server/static/js/chat.js +0 -0
  75. {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/tools/server/static/js/model-settings.js +0 -0
  76. {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/tools/server/static/logs.html +0 -0
  77. {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/tools/server/tool_calls.py +0 -0
  78. {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/tools/server/tray.py +0 -0
  79. {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/tools/server/utils/macos_tray.py +0 -0
  80. {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/tools/server/utils/port.py +0 -0
  81. {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/tools/server/utils/thread.py +0 -0
  82. {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/tools/server/utils/windows_tray.py +0 -0
  83. {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/tools/server/webapp.py +0 -0
  84. {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/tools/server/wrapped_server.py +0 -0
  85. {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade/tools/tool.py +0 -0
  86. {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade_install/__init__.py +0 -0
  87. {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade_sdk.egg-info/dependency_links.txt +0 -0
  88. {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade_sdk.egg-info/entry_points.txt +0 -0
  89. {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade_sdk.egg-info/top_level.txt +0 -0
  90. {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade_server/pydantic_models.py +0 -0
  91. {lemonade_sdk-8.1.12 → lemonade_sdk-8.2.0}/src/lemonade_server/settings.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lemonade-sdk
3
- Version: 8.1.12
3
+ Version: 8.2.0
4
4
  Summary: Lemonade SDK: Your LLM Aide for Validation and Deployment
5
5
  Author-email: lemonade@amd.com
6
6
  Requires-Python: >=3.10, <3.14
@@ -29,12 +29,13 @@ Requires-Dist: tabulate
29
29
  Requires-Dist: sentencepiece
30
30
  Requires-Dist: huggingface-hub[hf_xet]==0.33.0
31
31
  Requires-Dist: python-dotenv
32
+ Requires-Dist: python-multipart
32
33
  Requires-Dist: rumps>=0.4.0; sys_platform == "darwin"
33
34
  Provides-Extra: oga-ryzenai
34
- Requires-Dist: onnxruntime-genai-directml-ryzenai==0.7.0.2.1; extra == "oga-ryzenai"
35
+ Requires-Dist: onnxruntime-genai-directml-ryzenai==0.9.2; extra == "oga-ryzenai"
35
36
  Requires-Dist: protobuf>=6.30.1; extra == "oga-ryzenai"
36
37
  Provides-Extra: oga-cpu
37
- Requires-Dist: onnxruntime-genai==0.8.2; extra == "oga-cpu"
38
+ Requires-Dist: onnxruntime-genai==0.9.2; extra == "oga-cpu"
38
39
  Requires-Dist: onnxruntime>=1.22.0; extra == "oga-cpu"
39
40
  Provides-Extra: dev
40
41
  Requires-Dist: torch>=2.6.0; extra == "dev"
@@ -49,6 +49,7 @@ setup(
49
49
  "sentencepiece",
50
50
  "huggingface-hub[hf_xet]==0.33.0",
51
51
  "python-dotenv",
52
+ "python-multipart",
52
53
  # macOS-specific dependencies
53
54
  "rumps>=0.4.0; sys_platform == 'darwin'",
54
55
  ],
@@ -57,11 +58,11 @@ setup(
57
58
  # applications, without including developer-focused tools
58
59
  # Primary NPU extra using unified PyPI package
59
60
  "oga-ryzenai": [
60
- "onnxruntime-genai-directml-ryzenai==0.7.0.2.1",
61
+ "onnxruntime-genai-directml-ryzenai==0.9.2",
61
62
  "protobuf>=6.30.1",
62
63
  ],
63
64
  "oga-cpu": [
64
- "onnxruntime-genai==0.8.2",
65
+ "onnxruntime-genai==0.9.2",
65
66
  "onnxruntime >=1.22.0",
66
67
  ],
67
68
  # Developer-focused tools for benchmarking, accuracy testing, and
@@ -1110,32 +1110,6 @@ class LinuxSystemInfo(SystemInfo):
1110
1110
 
1111
1111
  return ""
1112
1112
 
1113
- def _get_nvidia_vram_smi_linux(self) -> float:
1114
- """
1115
- Get NVIDIA GPU VRAM on Linux using nvidia-smi command.
1116
-
1117
- Returns:
1118
- float: VRAM in GB, or 0.0 if detection fails
1119
- """
1120
- try:
1121
- output = (
1122
- subprocess.check_output(
1123
- "nvidia-smi --query-gpu=memory.total --format=csv,noheader,nounits",
1124
- shell=True,
1125
- stderr=subprocess.DEVNULL,
1126
- )
1127
- .decode()
1128
- .strip()
1129
- )
1130
-
1131
- # nvidia-smi returns memory in MB
1132
- vram_mb = int(output.split("\n")[0])
1133
- vram_gb = round(vram_mb / 1024, 1)
1134
- return vram_gb
1135
- except (subprocess.CalledProcessError, FileNotFoundError, ValueError):
1136
- pass
1137
- return 0.0
1138
-
1139
1113
  @staticmethod
1140
1114
  def get_processor_name() -> str:
1141
1115
  """
@@ -10,16 +10,46 @@ import time
10
10
  from typing import List, Optional
11
11
 
12
12
  import requests
13
- from packaging.version import Version
13
+ from packaging.version import Version, InvalidVersion
14
14
 
15
15
 
16
- FLM_MINIMUM_VERSION = "0.9.12"
16
+ def get_flm_latest_version() -> Optional[str]:
17
+ """
18
+ Get and return the latest FLM version from "https://github.com/FastFlowLM/FastFlowLM/tags"
19
+ This uses the GitHub tags API.
20
+ """
21
+ url = "https://api.github.com/repos/FastFlowLM/FastFlowLM/tags"
22
+ try:
23
+ response = requests.get(url, timeout=10)
24
+ response.raise_for_status()
25
+ tags = response.json()
26
+ if not tags:
27
+ return None
28
+ # Tags are sorted in reverse chronological order; find the first that looks like a version
29
+ for tag in tags:
30
+ tag_name = tag.get("name", "")
31
+ # Accept tags of the form v0.9.10, 0.9.10, etc.
32
+ if tag_name.startswith("v"):
33
+ version_candidate = tag_name[1:]
34
+ else:
35
+ version_candidate = tag_name
36
+ try:
37
+ # validate it's a version string
38
+ _ = Version(version_candidate)
39
+ return version_candidate
40
+ except InvalidVersion:
41
+ continue
42
+ return None
43
+ except requests.exceptions.RequestException as e:
44
+ logging.debug("Error retrieving latest FLM version: %s", e)
45
+ return None
17
46
 
18
47
 
19
48
  def check_flm_version() -> Optional[str]:
20
49
  """
21
50
  Check if FLM is installed and return version, or None if not available.
22
51
  """
52
+ latest_version_str = get_flm_latest_version()
23
53
  try:
24
54
  result = subprocess.run(
25
55
  ["flm", "version"],
@@ -34,11 +64,11 @@ def check_flm_version() -> Optional[str]:
34
64
  output = result.stdout.strip()
35
65
  if output.startswith("FLM v"):
36
66
  version_str = output[5:] # Remove "FLM v" prefix
37
- return version_str
38
- return None
67
+ return version_str, latest_version_str
68
+ return None, latest_version_str
39
69
 
40
70
  except (subprocess.CalledProcessError, FileNotFoundError):
41
- return None
71
+ return None, latest_version_str
42
72
 
43
73
 
44
74
  def refresh_environment():
@@ -76,31 +106,42 @@ def install_flm():
76
106
  If not, download and run the GUI installer, then wait for completion.
77
107
  """
78
108
  # Check current FLM installation
79
- current_version = check_flm_version()
109
+ current_version, latest_version = check_flm_version()
80
110
 
81
- if current_version and Version(current_version) >= Version(FLM_MINIMUM_VERSION):
111
+ if (
112
+ current_version
113
+ and latest_version
114
+ and Version(current_version) == Version(latest_version)
115
+ ):
82
116
  logging.info(
83
- "FLM v%s is already installed and meets minimum version requirement (v%s)",
117
+ "FLM v%s is already installed and is up to date (latest version: v%s).",
84
118
  current_version,
85
- FLM_MINIMUM_VERSION,
119
+ latest_version,
86
120
  )
87
121
  return
88
122
 
89
123
  if current_version:
124
+ if not latest_version:
125
+ logging.info(
126
+ "Unable to detect the latest FLM version; continuing with installed FLM v%s.",
127
+ current_version,
128
+ )
129
+ return
90
130
  logging.info(
91
- "FLM v%s is installed but below minimum version v%s. Upgrading...",
131
+ "FLM v%s is installed but below latest version v%s. Upgrading...",
92
132
  current_version,
93
- FLM_MINIMUM_VERSION,
133
+ latest_version,
94
134
  )
135
+ verysilent = True
95
136
  else:
96
- logging.info(
97
- "FLM not found. Installing FLM v%s or later...", FLM_MINIMUM_VERSION
98
- )
137
+ logging.info("FLM not found. Installing FLM v%s or later...", latest_version)
138
+ verysilent = False
99
139
 
100
140
  # Download the installer
101
141
  # pylint: disable=line-too-long
102
142
  installer_url = "https://github.com/FastFlowLM/FastFlowLM/releases/latest/download/flm-setup.exe"
103
143
  installer_path = os.path.join(tempfile.gettempdir(), "flm-setup.exe")
144
+ installer_args = [installer_path, "/VERYSILENT"] if verysilent else [installer_path]
104
145
 
105
146
  try:
106
147
  # Remove existing installer if present
@@ -123,13 +164,15 @@ def install_flm():
123
164
  # Launch the installer GUI
124
165
  logging.warning(
125
166
  "Launching FLM installer GUI. Please complete the installation..."
167
+ if not verysilent
168
+ else "Installing FLM..."
126
169
  )
127
170
 
128
171
  # Launch installer and wait for it to complete
129
172
  if os.name == "nt": # Windows
130
- process = subprocess.Popen([installer_path], shell=True)
173
+ process = subprocess.Popen(installer_args, shell=True)
131
174
  else:
132
- process = subprocess.Popen([installer_path])
175
+ process = subprocess.Popen(installer_args)
133
176
 
134
177
  # Wait for installer to complete
135
178
  process.wait()
@@ -150,8 +193,8 @@ def install_flm():
150
193
  # Verify installation
151
194
  max_retries = 10
152
195
  for attempt in range(max_retries):
153
- new_version = check_flm_version()
154
- if new_version and Version(new_version) >= Version(FLM_MINIMUM_VERSION):
196
+ new_version, latest_version = check_flm_version()
197
+ if new_version and Version(new_version) == Version(latest_version):
155
198
  logging.info("FLM v%s successfully installed and verified", new_version)
156
199
  return
157
200
 
@@ -240,7 +283,12 @@ def get_flm_installed_models() -> List[str]:
240
283
 
241
284
  return installed_checkpoints
242
285
 
243
- except (subprocess.CalledProcessError, FileNotFoundError, AttributeError):
286
+ except (
287
+ subprocess.CalledProcessError,
288
+ FileNotFoundError,
289
+ AttributeError,
290
+ NotADirectoryError,
291
+ ):
244
292
  # FLM not installed, not available, or output parsing failed
245
293
  return []
246
294
 
@@ -249,7 +297,7 @@ def is_flm_available() -> bool:
249
297
  """
250
298
  Check if FLM is available and meets minimum version requirements.
251
299
  """
252
- current_version = check_flm_version()
253
- return current_version is not None and Version(current_version) >= Version(
254
- FLM_MINIMUM_VERSION
300
+ current_version, latest_version = check_flm_version()
301
+ return current_version is not None and Version(current_version) == Version(
302
+ latest_version
255
303
  )
@@ -97,6 +97,7 @@ class LoadLlamaCpp(FirstTool):
97
97
  get_llama_installed_version,
98
98
  parse_checkpoint,
99
99
  download_gguf,
100
+ resolve_local_gguf_model,
100
101
  get_local_checkpoint_path,
101
102
  LlamaCppTokenizerAdapter,
102
103
  LlamaCppAdapter,
@@ -169,8 +170,16 @@ class LoadLlamaCpp(FirstTool):
169
170
  )
170
171
 
171
172
  else:
173
+ # First, try to resolve from local cache to avoid unnecessary downloads
174
+ base_checkpoint, variant = parse_checkpoint(checkpoint)
175
+ snapshot_files = resolve_local_gguf_model(
176
+ base_checkpoint, variant, None
177
+ )
178
+
179
+ # If not found locally, download from internet
180
+ if not snapshot_files:
181
+ snapshot_files = download_gguf(checkpoint)
172
182
 
173
- snapshot_files = download_gguf(checkpoint)
174
183
  full_model_path = snapshot_files["variant"]
175
184
  model_to_use = os.path.basename(full_model_path)
176
185
 
@@ -10,9 +10,7 @@ import requests
10
10
  import lemonade.common.build as build
11
11
  import lemonade.common.printing as printing
12
12
  from lemonade.tools.adapter import PassthroughTokenizer, ModelAdapter
13
-
14
13
  from lemonade.common.system_info import get_system_info
15
-
16
14
  from dotenv import set_key, load_dotenv
17
15
 
18
16
  LLAMA_VERSION_VULKAN = "b6510"
@@ -378,7 +376,7 @@ def install_llamacpp(backend):
378
376
  import stat
379
377
 
380
378
  # Find and make executable files executable
381
- for root, dirs, files in os.walk(llama_server_exe_dir):
379
+ for root, _, files in os.walk(llama_server_exe_dir):
382
380
  for file in files:
383
381
  file_path = os.path.join(root, file)
384
382
  # Make files in bin/ directories executable
@@ -656,15 +654,91 @@ def identify_gguf_models(
656
654
  return core_files, sharded_files
657
655
 
658
656
 
659
- def download_gguf(config_checkpoint, config_mmproj=None, do_not_upgrade=False) -> dict:
657
+ def resolve_local_gguf_model(
658
+ checkpoint: str, variant: str, config_mmproj: str = None
659
+ ) -> dict | None:
660
660
  """
661
- Downloads the GGUF file for the given model configuration.
661
+ Attempts to resolve a GGUF model from the local HuggingFace cache.
662
+ """
663
+ from huggingface_hub.constants import HF_HUB_CACHE
664
+
665
+ # Convert checkpoint to cache directory format
666
+ if checkpoint.startswith("models--"):
667
+ model_cache_dir = os.path.join(HF_HUB_CACHE, checkpoint)
668
+ else:
669
+ # This is a HuggingFace repo - convert to cache directory format
670
+ repo_cache_name = checkpoint.replace("/", "--")
671
+ model_cache_dir = os.path.join(HF_HUB_CACHE, f"models--{repo_cache_name}")
672
+
673
+ # Check if the cache directory exists
674
+ if not os.path.exists(model_cache_dir):
675
+ return None
676
+
677
+ gguf_file_found = None
678
+
679
+ # If variant is specified, look for that specific file
680
+ if variant:
681
+ search_term = variant if variant.endswith(".gguf") else f"{variant}.gguf"
682
+
683
+ for root, _, files in os.walk(model_cache_dir):
684
+ if search_term in files:
685
+ gguf_file_found = os.path.join(root, search_term)
686
+ break
687
+
688
+ # If no variant or variant not found, find any .gguf file (excluding mmproj)
689
+ if not gguf_file_found:
690
+ for root, _, files in os.walk(model_cache_dir):
691
+ gguf_files = [
692
+ f for f in files if f.endswith(".gguf") and "mmproj" not in f.lower()
693
+ ]
694
+ if gguf_files:
695
+ gguf_file_found = os.path.join(root, gguf_files[0])
696
+ break
697
+
698
+ # If no GGUF file found, model is not in cache
699
+ if not gguf_file_found:
700
+ return None
701
+
702
+ # Build result dictionary
703
+ result = {"variant": gguf_file_found}
704
+
705
+ # Search for mmproj file if provided
706
+ if config_mmproj:
707
+ for root, _, files in os.walk(model_cache_dir):
708
+ if config_mmproj in files:
709
+ result["mmproj"] = os.path.join(root, config_mmproj)
710
+ break
711
+
712
+ logging.info(f"Resolved local GGUF model: {result}")
713
+ return result
662
714
 
663
- For sharded models, if the variant points to a folder (e.g. Q4_0), all files in that folder
664
- will be downloaded but only the first file will be returned for loading.
715
+
716
+ def download_gguf(
717
+ config_checkpoint: str, config_mmproj=None, do_not_upgrade: bool = False
718
+ ) -> dict:
665
719
  """
720
+ Downloads the GGUF file for the given model configuration from HuggingFace.
721
+
722
+ This function downloads models from the internet. It does NOT check the local cache first.
723
+ Callers should use resolve_local_gguf_model() if they want to check for existing models first.
724
+
725
+ Args:
726
+ config_checkpoint: Checkpoint identifier (file path or HF repo with variant)
727
+ config_mmproj: Optional mmproj file to also download
728
+ do_not_upgrade: If True, use local cache only without attempting to download updates
666
729
 
667
- # This code handles all cases by constructing the appropriate filename or pattern
730
+ Returns:
731
+ Dictionary with "variant" (and optionally "mmproj") file paths
732
+ """
733
+ # Handle direct file path case - if the checkpoint is an actual file on disk
734
+ if os.path.exists(config_checkpoint):
735
+ result = {"variant": config_checkpoint}
736
+ if config_mmproj:
737
+ result["mmproj"] = config_mmproj
738
+ return result
739
+
740
+ # Parse checkpoint to extract base and variant
741
+ # Checkpoint format: repo_name:variant (e.g., "unsloth/Qwen3-0.6B-GGUF:Q4_0")
668
742
  checkpoint, variant = parse_checkpoint(config_checkpoint)
669
743
 
670
744
  # Identify the GGUF model files in the repository that match the variant
@@ -4,7 +4,6 @@
4
4
 
5
5
  import argparse
6
6
  import subprocess
7
- import sys
8
7
  import os
9
8
  import json
10
9
  import webbrowser
@@ -38,6 +37,17 @@ execution_providers = {
38
37
  }
39
38
 
40
39
 
40
+ def find_onnx_files_recursively(directory):
41
+ """
42
+ Recursively search for ONNX files in a directory and its subdirectories.
43
+ """
44
+ for _, _, files in os.walk(directory):
45
+ for file in files:
46
+ if file.endswith(".onnx"):
47
+ return True
48
+ return False
49
+
50
+
41
51
  def _get_npu_driver_version():
42
52
  """
43
53
  Get the NPU driver version using PowerShell directly.
@@ -321,6 +331,7 @@ class OgaLoad(FirstTool):
321
331
 
322
332
  @staticmethod
323
333
  def _setup_model_dependencies(full_model_path, device, ryzenai_version, oga_path):
334
+ # pylint: disable=unused-argument
324
335
  """
325
336
  Sets up model dependencies for hybrid and NPU inference by:
326
337
  1. Configuring the custom_ops_library path in genai_config.json.
@@ -328,116 +339,35 @@ class OgaLoad(FirstTool):
328
339
  3. Check NPU driver version if required for device and ryzenai_version.
329
340
  """
330
341
 
331
- env_path = sys.prefix
342
+ # For RyzenAI 1.6.0, check NPU driver version for NPU and hybrid devices
343
+ if device in ["npu", "hybrid"]:
344
+ required_driver_version = REQUIRED_NPU_DRIVER_VERSION
332
345
 
333
- if "1.4.0" in ryzenai_version:
334
- if device == "npu":
335
- custom_ops_path = os.path.join(
336
- oga_path, "libs", "onnxruntime_vitis_ai_custom_ops.dll"
337
- )
338
- else:
339
- custom_ops_path = os.path.join(oga_path, "libs", "onnx_custom_ops.dll")
340
- else:
341
- # For 1.5.0+, check NPU driver version for NPU and hybrid devices
342
- if device in ["npu", "hybrid"]:
343
- required_driver_version = REQUIRED_NPU_DRIVER_VERSION
344
-
345
- current_driver_version = _get_npu_driver_version()
346
-
347
- if not current_driver_version:
348
- printing.log_warning(
349
- f"NPU driver not found. {device.upper()} inference requires NPU driver "
350
- f"version {required_driver_version}.\n"
351
- "Please download and install the NPU Driver from:\n"
352
- f"{NPU_DRIVER_DOWNLOAD_URL}\n"
353
- "NPU functionality may not work properly."
354
- )
355
- _open_driver_install_page()
356
-
357
- elif not _compare_driver_versions(
358
- current_driver_version, required_driver_version
359
- ):
360
- printing.log_warning(
361
- f"Incorrect NPU driver version detected: {current_driver_version}\n"
362
- f"{device.upper()} inference with RyzenAI 1.5.0 requires driver "
363
- f"version {required_driver_version} or higher.\n"
364
- "Please download and install the correct NPU Driver from:\n"
365
- f"{NPU_DRIVER_DOWNLOAD_URL}\n"
366
- "NPU functionality may not work properly."
367
- )
368
- _open_driver_install_page()
369
-
370
- if device == "npu":
371
- # For 1.5.0, custom ops are in the conda environment's onnxruntime package
372
- custom_ops_path = os.path.join(
373
- env_path,
374
- "Lib",
375
- "site-packages",
376
- "onnxruntime",
377
- "capi",
378
- "onnxruntime_vitis_ai_custom_ops.dll",
379
- )
380
- dll_source_path = os.path.join(
381
- env_path, "Lib", "site-packages", "onnxruntime", "capi"
382
- )
383
- required_dlls = ["dyn_dispatch_core.dll", "xaiengine.dll"]
384
- else:
385
- custom_ops_path = os.path.join(
386
- env_path,
387
- "Lib",
388
- "site-packages",
389
- "onnxruntime_genai",
390
- "onnx_custom_ops.dll",
391
- )
392
- dll_source_path = os.path.join(
393
- env_path, "Lib", "site-packages", "onnxruntime_genai"
394
- )
395
- required_dlls = ["libutf8_validity.dll", "abseil_dll.dll"]
396
-
397
- # Validate that all required DLLs exist in the source directory
398
- missing_dlls = []
399
- if not os.path.exists(custom_ops_path):
400
- missing_dlls.append(custom_ops_path)
401
-
402
- for dll_name in required_dlls:
403
- dll_source = os.path.join(dll_source_path, dll_name)
404
- if not os.path.exists(dll_source):
405
- missing_dlls.append(dll_source)
406
-
407
- if missing_dlls:
408
- dll_list = "\n - ".join(missing_dlls)
409
- raise RuntimeError(
410
- f"Required DLLs not found for {device} inference:\n - {dll_list}\n"
411
- f"Please ensure your RyzenAI installation is complete and supports {device}."
346
+ current_driver_version = _get_npu_driver_version()
347
+ rai_version, _ = _get_ryzenai_version_info(device)
348
+
349
+ if not current_driver_version:
350
+ printing.log_warning(
351
+ f"NPU driver not found. {device.upper()} inference requires NPU driver "
352
+ f"version {required_driver_version}.\n"
353
+ "Please download and install the NPU Driver from:\n"
354
+ f"{NPU_DRIVER_DOWNLOAD_URL}\n"
355
+ "NPU functionality may not work properly."
412
356
  )
357
+ _open_driver_install_page()
413
358
 
414
- # Add the DLL source directory to PATH
415
- current_path = os.environ.get("PATH", "")
416
- if dll_source_path not in current_path:
417
- os.environ["PATH"] = dll_source_path + os.pathsep + current_path
418
-
419
- # Update the model config with custom_ops_library path
420
- config_path = os.path.join(full_model_path, "genai_config.json")
421
- if os.path.exists(config_path):
422
- with open(config_path, "r", encoding="utf-8") as f:
423
- config = json.load(f)
424
-
425
- if (
426
- "model" in config
427
- and "decoder" in config["model"]
428
- and "session_options" in config["model"]["decoder"]
359
+ elif not _compare_driver_versions(
360
+ current_driver_version, required_driver_version
429
361
  ):
430
- config["model"]["decoder"]["session_options"][
431
- "custom_ops_library"
432
- ] = custom_ops_path
433
-
434
- with open(config_path, "w", encoding="utf-8") as f:
435
- json.dump(config, f, indent=4)
436
-
437
- else:
438
- printing.log_info(
439
- f"Model's `genai_config.json` not found in {full_model_path}"
440
- )
362
+ printing.log_warning(
363
+ f"Incorrect NPU driver version detected: {current_driver_version}\n"
364
+ f"{device.upper()} inference with RyzenAI {rai_version} requires driver "
365
+ f"version {required_driver_version} or higher.\n"
366
+ "Please download and install the correct NPU Driver from:\n"
367
+ f"{NPU_DRIVER_DOWNLOAD_URL}\n"
368
+ "NPU functionality may not work properly."
369
+ )
370
+ _open_driver_install_page()
441
371
 
442
372
  @staticmethod
443
373
  def _is_preoptimized_model(input_model_path):
@@ -502,34 +432,6 @@ class OgaLoad(FirstTool):
502
432
 
503
433
  return full_model_path
504
434
 
505
- @staticmethod
506
- def _setup_npu_environment(ryzenai_version, oga_path):
507
- """
508
- Sets up environment for NPU flow of ONNX model and returns saved state to be restored
509
- later in cleanup.
510
- """
511
- if "1.5.0" in ryzenai_version:
512
- # For PyPI installation (1.5.0+), no environment setup needed
513
- return None
514
- elif "1.4.0" in ryzenai_version:
515
- # Legacy lemonade-install approach for 1.4.0
516
- if not os.path.exists(os.path.join(oga_path, "libs", "onnxruntime.dll")):
517
- raise RuntimeError(
518
- f"Cannot find libs/onnxruntime.dll in lib folder: {oga_path}"
519
- )
520
-
521
- # Save current state so they can be restored after inference.
522
- saved_state = {"cwd": os.getcwd(), "path": os.environ["PATH"]}
523
-
524
- # Setup NPU environment (cwd and path will be restored later)
525
- os.chdir(oga_path)
526
- os.environ["PATH"] = (
527
- os.path.join(oga_path, "libs") + os.pathsep + os.environ["PATH"]
528
- )
529
- return saved_state
530
- else:
531
- raise ValueError(f"Unsupported RyzenAI version: {ryzenai_version}")
532
-
533
435
  @staticmethod
534
436
  def _load_model_and_setup_state(
535
437
  state, full_model_path, checkpoint, trust_remote_code
@@ -702,8 +604,7 @@ class OgaLoad(FirstTool):
702
604
  state.save_stat(Keys.CHECKPOINT, checkpoint)
703
605
  state.save_stat(Keys.LOCAL_MODEL_FOLDER, full_model_path)
704
606
  # See if there is a file ending in ".onnx" in this folder
705
- dir = os.listdir(input)
706
- has_onnx_file = any([filename.endswith(".onnx") for filename in dir])
607
+ has_onnx_file = find_onnx_files_recursively(input)
707
608
  if not has_onnx_file:
708
609
  raise ValueError(
709
610
  f"The folder {input} does not contain an ONNX model file."
@@ -852,15 +753,10 @@ class OgaLoad(FirstTool):
852
753
 
853
754
  try:
854
755
  if device == "npu":
855
- saved_env_state = self._setup_npu_environment(
856
- ryzenai_version, oga_path
857
- )
858
756
  # Set USE_AIE_RoPE based on model type
859
757
  os.environ["USE_AIE_RoPE"] = (
860
758
  "0" if "phi-" in checkpoint.lower() else "1"
861
759
  )
862
- elif device == "hybrid":
863
- saved_env_state = None
864
760
 
865
761
  self._load_model_and_setup_state(
866
762
  state, full_model_path, checkpoint, trust_remote_code