lemonade-sdk 8.1.4__py3-none-any.whl → 8.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lemonade-sdk might be problematic. Click here for more details.
- lemonade/cache.py +6 -1
- lemonade/cli.py +47 -5
- lemonade/common/inference_engines.py +13 -4
- lemonade/common/status.py +4 -4
- lemonade/common/system_info.py +544 -1
- lemonade/profilers/agt_power.py +437 -0
- lemonade/profilers/hwinfo_power.py +429 -0
- lemonade/tools/accuracy.py +143 -48
- lemonade/tools/adapter.py +6 -1
- lemonade/tools/bench.py +26 -8
- lemonade/tools/flm/__init__.py +1 -0
- lemonade/tools/flm/utils.py +303 -0
- lemonade/tools/huggingface/bench.py +6 -1
- lemonade/tools/llamacpp/bench.py +146 -27
- lemonade/tools/llamacpp/load.py +30 -2
- lemonade/tools/llamacpp/utils.py +393 -33
- lemonade/tools/oga/bench.py +5 -26
- lemonade/tools/oga/load.py +60 -121
- lemonade/tools/oga/migration.py +403 -0
- lemonade/tools/report/table.py +76 -8
- lemonade/tools/server/flm.py +133 -0
- lemonade/tools/server/llamacpp.py +220 -553
- lemonade/tools/server/serve.py +684 -168
- lemonade/tools/server/static/js/chat.js +666 -342
- lemonade/tools/server/static/js/model-settings.js +24 -3
- lemonade/tools/server/static/js/models.js +597 -73
- lemonade/tools/server/static/js/shared.js +79 -14
- lemonade/tools/server/static/logs.html +191 -0
- lemonade/tools/server/static/styles.css +491 -66
- lemonade/tools/server/static/webapp.html +83 -31
- lemonade/tools/server/tray.py +158 -38
- lemonade/tools/server/utils/macos_tray.py +226 -0
- lemonade/tools/server/utils/{system_tray.py → windows_tray.py} +13 -0
- lemonade/tools/server/webapp.py +4 -1
- lemonade/tools/server/wrapped_server.py +559 -0
- lemonade/version.py +1 -1
- lemonade_install/install.py +54 -611
- {lemonade_sdk-8.1.4.dist-info → lemonade_sdk-8.2.2.dist-info}/METADATA +29 -72
- lemonade_sdk-8.2.2.dist-info/RECORD +83 -0
- lemonade_server/cli.py +145 -37
- lemonade_server/model_manager.py +521 -37
- lemonade_server/pydantic_models.py +28 -1
- lemonade_server/server_models.json +246 -92
- lemonade_server/settings.py +39 -39
- lemonade/tools/quark/__init__.py +0 -0
- lemonade/tools/quark/quark_load.py +0 -173
- lemonade/tools/quark/quark_quantize.py +0 -439
- lemonade_sdk-8.1.4.dist-info/RECORD +0 -77
- {lemonade_sdk-8.1.4.dist-info → lemonade_sdk-8.2.2.dist-info}/WHEEL +0 -0
- {lemonade_sdk-8.1.4.dist-info → lemonade_sdk-8.2.2.dist-info}/entry_points.txt +0 -0
- {lemonade_sdk-8.1.4.dist-info → lemonade_sdk-8.2.2.dist-info}/licenses/LICENSE +0 -0
- {lemonade_sdk-8.1.4.dist-info → lemonade_sdk-8.2.2.dist-info}/licenses/NOTICE.md +0 -0
- {lemonade_sdk-8.1.4.dist-info → lemonade_sdk-8.2.2.dist-info}/top_level.txt +0 -0
lemonade/tools/oga/bench.py
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
import argparse
|
|
2
2
|
import statistics
|
|
3
3
|
from statistics import StatisticsError
|
|
4
|
+
import psutil
|
|
4
5
|
from lemonade.state import State
|
|
5
|
-
from lemonade.cache import Keys
|
|
6
6
|
from lemonade.tools.adapter import ModelAdapter, TokenizerAdapter
|
|
7
7
|
from lemonade.tools.bench import Bench
|
|
8
8
|
|
|
@@ -20,16 +20,6 @@ class OgaBench(Bench):
|
|
|
20
20
|
|
|
21
21
|
unique_name = "oga-bench"
|
|
22
22
|
|
|
23
|
-
def __init__(self):
|
|
24
|
-
super().__init__()
|
|
25
|
-
|
|
26
|
-
# Additional statistics generated by this bench tool
|
|
27
|
-
self.status_stats.insert(
|
|
28
|
-
self.status_stats.index(Keys.TOKEN_GENERATION_TOKENS_PER_SECOND) + 1,
|
|
29
|
-
Keys.STD_DEV_TOKENS_PER_SECOND,
|
|
30
|
-
)
|
|
31
|
-
self.std_dev_token_generation_tokens_per_second_list = []
|
|
32
|
-
|
|
33
23
|
@staticmethod
|
|
34
24
|
def parser(add_help: bool = True) -> argparse.ArgumentParser:
|
|
35
25
|
parser = __class__.helpful_parser(
|
|
@@ -62,7 +52,7 @@ class OgaBench(Bench):
|
|
|
62
52
|
iterations: int,
|
|
63
53
|
warmup_iterations: int,
|
|
64
54
|
output_tokens: int,
|
|
65
|
-
)
|
|
55
|
+
):
|
|
66
56
|
|
|
67
57
|
model: ModelAdapter = state.model
|
|
68
58
|
tokenizer: TokenizerAdapter = state.tokenizer
|
|
@@ -120,20 +110,9 @@ class OgaBench(Bench):
|
|
|
120
110
|
except StatisticsError:
|
|
121
111
|
# Less than 2 measurements
|
|
122
112
|
self.std_dev_token_generation_tokens_per_second_list.append(None)
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
# Save additional statistics
|
|
128
|
-
if not all(
|
|
129
|
-
element is None
|
|
130
|
-
for element in self.std_dev_token_generation_tokens_per_second_list
|
|
131
|
-
):
|
|
132
|
-
state.save_stat(
|
|
133
|
-
Keys.STD_DEV_TOKENS_PER_SECOND,
|
|
134
|
-
self.get_item_or_list(
|
|
135
|
-
self.std_dev_token_generation_tokens_per_second_list
|
|
136
|
-
),
|
|
113
|
+
if self.save_max_memory_used:
|
|
114
|
+
self.max_memory_used_gb_list.append(
|
|
115
|
+
psutil.Process().memory_info().peak_wset / 1024**3
|
|
137
116
|
)
|
|
138
117
|
|
|
139
118
|
|
lemonade/tools/oga/load.py
CHANGED
|
@@ -38,6 +38,17 @@ execution_providers = {
|
|
|
38
38
|
}
|
|
39
39
|
|
|
40
40
|
|
|
41
|
+
def find_onnx_files_recursively(directory):
|
|
42
|
+
"""
|
|
43
|
+
Recursively search for ONNX files in a directory and its subdirectories.
|
|
44
|
+
"""
|
|
45
|
+
for _, _, files in os.walk(directory):
|
|
46
|
+
for file in files:
|
|
47
|
+
if file.endswith(".onnx"):
|
|
48
|
+
return True
|
|
49
|
+
return False
|
|
50
|
+
|
|
51
|
+
|
|
41
52
|
def _get_npu_driver_version():
|
|
42
53
|
"""
|
|
43
54
|
Get the NPU driver version using PowerShell directly.
|
|
@@ -74,6 +85,17 @@ def _get_npu_driver_version():
|
|
|
74
85
|
return None
|
|
75
86
|
|
|
76
87
|
|
|
88
|
+
def _compare_driver_versions(current_version, required_version):
|
|
89
|
+
"""
|
|
90
|
+
Compare two driver version strings.
|
|
91
|
+
Returns True if current_version >= required_version, False otherwise.
|
|
92
|
+
Uses packaging.version for proper semantic version comparison.
|
|
93
|
+
"""
|
|
94
|
+
from packaging.version import Version
|
|
95
|
+
|
|
96
|
+
return Version(current_version) >= Version(required_version)
|
|
97
|
+
|
|
98
|
+
|
|
77
99
|
def import_error_heler(e: Exception):
|
|
78
100
|
"""
|
|
79
101
|
Print a helpful message in the event of an import error
|
|
@@ -310,6 +332,7 @@ class OgaLoad(FirstTool):
|
|
|
310
332
|
|
|
311
333
|
@staticmethod
|
|
312
334
|
def _setup_model_dependencies(full_model_path, device, ryzenai_version, oga_path):
|
|
335
|
+
# pylint: disable=unused-argument
|
|
313
336
|
"""
|
|
314
337
|
Sets up model dependencies for hybrid and NPU inference by:
|
|
315
338
|
1. Configuring the custom_ops_library path in genai_config.json.
|
|
@@ -317,74 +340,45 @@ class OgaLoad(FirstTool):
|
|
|
317
340
|
3. Check NPU driver version if required for device and ryzenai_version.
|
|
318
341
|
"""
|
|
319
342
|
|
|
320
|
-
|
|
343
|
+
# For RyzenAI 1.6.0, check NPU driver version for NPU and hybrid devices
|
|
344
|
+
if device in ["npu", "hybrid"]:
|
|
345
|
+
required_driver_version = REQUIRED_NPU_DRIVER_VERSION
|
|
321
346
|
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
required_driver_version = REQUIRED_NPU_DRIVER_VERSION
|
|
333
|
-
|
|
334
|
-
current_driver_version = _get_npu_driver_version()
|
|
335
|
-
|
|
336
|
-
if not current_driver_version:
|
|
337
|
-
printing.log_warning(
|
|
338
|
-
f"NPU driver not found. {device.upper()} inference requires NPU driver "
|
|
339
|
-
f"version {required_driver_version}.\n"
|
|
340
|
-
"Please download and install the NPU Driver from:\n"
|
|
341
|
-
f"{NPU_DRIVER_DOWNLOAD_URL}\n"
|
|
342
|
-
"NPU functionality may not work properly."
|
|
343
|
-
)
|
|
344
|
-
_open_driver_install_page()
|
|
345
|
-
|
|
346
|
-
elif current_driver_version != required_driver_version:
|
|
347
|
-
printing.log_warning(
|
|
348
|
-
f"Incorrect NPU driver version detected: {current_driver_version}\n"
|
|
349
|
-
f"{device.upper()} inference with RyzenAI 1.5.0 requires driver "
|
|
350
|
-
f"version {required_driver_version}.\n"
|
|
351
|
-
"Please download and install the correct NPU Driver from:\n"
|
|
352
|
-
f"{NPU_DRIVER_DOWNLOAD_URL}\n"
|
|
353
|
-
"NPU functionality may not work properly."
|
|
354
|
-
)
|
|
355
|
-
_open_driver_install_page()
|
|
356
|
-
|
|
357
|
-
if device == "npu":
|
|
358
|
-
# For 1.5.0, custom ops are in the conda environment's onnxruntime package
|
|
359
|
-
custom_ops_path = os.path.join(
|
|
360
|
-
env_path,
|
|
361
|
-
"Lib",
|
|
362
|
-
"site-packages",
|
|
363
|
-
"onnxruntime",
|
|
364
|
-
"capi",
|
|
365
|
-
"onnxruntime_vitis_ai_custom_ops.dll",
|
|
366
|
-
)
|
|
367
|
-
dll_source_path = os.path.join(
|
|
368
|
-
env_path, "Lib", "site-packages", "onnxruntime", "capi"
|
|
369
|
-
)
|
|
370
|
-
required_dlls = ["dyn_dispatch_core.dll", "xaiengine.dll"]
|
|
371
|
-
else:
|
|
372
|
-
custom_ops_path = os.path.join(
|
|
373
|
-
env_path,
|
|
374
|
-
"Lib",
|
|
375
|
-
"site-packages",
|
|
376
|
-
"onnxruntime_genai",
|
|
377
|
-
"onnx_custom_ops.dll",
|
|
347
|
+
current_driver_version = _get_npu_driver_version()
|
|
348
|
+
rai_version, _ = _get_ryzenai_version_info(device)
|
|
349
|
+
|
|
350
|
+
if not current_driver_version:
|
|
351
|
+
printing.log_warning(
|
|
352
|
+
f"NPU driver not found. {device.upper()} inference requires NPU driver "
|
|
353
|
+
f"version {required_driver_version}.\n"
|
|
354
|
+
"Please download and install the NPU Driver from:\n"
|
|
355
|
+
f"{NPU_DRIVER_DOWNLOAD_URL}\n"
|
|
356
|
+
"NPU functionality may not work properly."
|
|
378
357
|
)
|
|
379
|
-
|
|
380
|
-
|
|
358
|
+
_open_driver_install_page()
|
|
359
|
+
|
|
360
|
+
elif not _compare_driver_versions(
|
|
361
|
+
current_driver_version, required_driver_version
|
|
362
|
+
):
|
|
363
|
+
printing.log_warning(
|
|
364
|
+
f"Incorrect NPU driver version detected: {current_driver_version}\n"
|
|
365
|
+
f"{device.upper()} inference with RyzenAI {rai_version} requires driver "
|
|
366
|
+
f"version {required_driver_version} or higher.\n"
|
|
367
|
+
"Please download and install the correct NPU Driver from:\n"
|
|
368
|
+
f"{NPU_DRIVER_DOWNLOAD_URL}\n"
|
|
369
|
+
"NPU functionality may not work properly."
|
|
381
370
|
)
|
|
382
|
-
|
|
371
|
+
_open_driver_install_page()
|
|
372
|
+
|
|
373
|
+
# Setup DLL paths for NPU/hybrid inference
|
|
374
|
+
env_path = os.path.dirname(sys.executable)
|
|
375
|
+
dll_source_path = os.path.join(
|
|
376
|
+
env_path, "Lib", "site-packages", "onnxruntime_genai"
|
|
377
|
+
)
|
|
378
|
+
required_dlls = ["libutf8_validity.dll", "abseil_dll.dll"]
|
|
383
379
|
|
|
384
380
|
# Validate that all required DLLs exist in the source directory
|
|
385
381
|
missing_dlls = []
|
|
386
|
-
if not os.path.exists(custom_ops_path):
|
|
387
|
-
missing_dlls.append(custom_ops_path)
|
|
388
382
|
|
|
389
383
|
for dll_name in required_dlls:
|
|
390
384
|
dll_source = os.path.join(dll_source_path, dll_name)
|
|
@@ -395,7 +389,9 @@ class OgaLoad(FirstTool):
|
|
|
395
389
|
dll_list = "\n - ".join(missing_dlls)
|
|
396
390
|
raise RuntimeError(
|
|
397
391
|
f"Required DLLs not found for {device} inference:\n - {dll_list}\n"
|
|
398
|
-
f"Please ensure your RyzenAI installation is complete and supports {device}
|
|
392
|
+
f"Please ensure your RyzenAI installation is complete and supports {device}.\n"
|
|
393
|
+
"Please reinstall the RyzenAI Software for your platform. Run:\n"
|
|
394
|
+
" pip install lemonade-sdk[oga-ryzenai]\n"
|
|
399
395
|
)
|
|
400
396
|
|
|
401
397
|
# Add the DLL source directory to PATH
|
|
@@ -403,29 +399,6 @@ class OgaLoad(FirstTool):
|
|
|
403
399
|
if dll_source_path not in current_path:
|
|
404
400
|
os.environ["PATH"] = dll_source_path + os.pathsep + current_path
|
|
405
401
|
|
|
406
|
-
# Update the model config with custom_ops_library path
|
|
407
|
-
config_path = os.path.join(full_model_path, "genai_config.json")
|
|
408
|
-
if os.path.exists(config_path):
|
|
409
|
-
with open(config_path, "r", encoding="utf-8") as f:
|
|
410
|
-
config = json.load(f)
|
|
411
|
-
|
|
412
|
-
if (
|
|
413
|
-
"model" in config
|
|
414
|
-
and "decoder" in config["model"]
|
|
415
|
-
and "session_options" in config["model"]["decoder"]
|
|
416
|
-
):
|
|
417
|
-
config["model"]["decoder"]["session_options"][
|
|
418
|
-
"custom_ops_library"
|
|
419
|
-
] = custom_ops_path
|
|
420
|
-
|
|
421
|
-
with open(config_path, "w", encoding="utf-8") as f:
|
|
422
|
-
json.dump(config, f, indent=4)
|
|
423
|
-
|
|
424
|
-
else:
|
|
425
|
-
printing.log_info(
|
|
426
|
-
f"Model's `genai_config.json` not found in {full_model_path}"
|
|
427
|
-
)
|
|
428
|
-
|
|
429
402
|
@staticmethod
|
|
430
403
|
def _is_preoptimized_model(input_model_path):
|
|
431
404
|
"""
|
|
@@ -489,34 +462,6 @@ class OgaLoad(FirstTool):
|
|
|
489
462
|
|
|
490
463
|
return full_model_path
|
|
491
464
|
|
|
492
|
-
@staticmethod
|
|
493
|
-
def _setup_npu_environment(ryzenai_version, oga_path):
|
|
494
|
-
"""
|
|
495
|
-
Sets up environment for NPU flow of ONNX model and returns saved state to be restored
|
|
496
|
-
later in cleanup.
|
|
497
|
-
"""
|
|
498
|
-
if "1.5.0" in ryzenai_version:
|
|
499
|
-
# For PyPI installation (1.5.0+), no environment setup needed
|
|
500
|
-
return None
|
|
501
|
-
elif "1.4.0" in ryzenai_version:
|
|
502
|
-
# Legacy lemonade-install approach for 1.4.0
|
|
503
|
-
if not os.path.exists(os.path.join(oga_path, "libs", "onnxruntime.dll")):
|
|
504
|
-
raise RuntimeError(
|
|
505
|
-
f"Cannot find libs/onnxruntime.dll in lib folder: {oga_path}"
|
|
506
|
-
)
|
|
507
|
-
|
|
508
|
-
# Save current state so they can be restored after inference.
|
|
509
|
-
saved_state = {"cwd": os.getcwd(), "path": os.environ["PATH"]}
|
|
510
|
-
|
|
511
|
-
# Setup NPU environment (cwd and path will be restored later)
|
|
512
|
-
os.chdir(oga_path)
|
|
513
|
-
os.environ["PATH"] = (
|
|
514
|
-
os.path.join(oga_path, "libs") + os.pathsep + os.environ["PATH"]
|
|
515
|
-
)
|
|
516
|
-
return saved_state
|
|
517
|
-
else:
|
|
518
|
-
raise ValueError(f"Unsupported RyzenAI version: {ryzenai_version}")
|
|
519
|
-
|
|
520
465
|
@staticmethod
|
|
521
466
|
def _load_model_and_setup_state(
|
|
522
467
|
state, full_model_path, checkpoint, trust_remote_code
|
|
@@ -689,8 +634,7 @@ class OgaLoad(FirstTool):
|
|
|
689
634
|
state.save_stat(Keys.CHECKPOINT, checkpoint)
|
|
690
635
|
state.save_stat(Keys.LOCAL_MODEL_FOLDER, full_model_path)
|
|
691
636
|
# See if there is a file ending in ".onnx" in this folder
|
|
692
|
-
|
|
693
|
-
has_onnx_file = any([filename.endswith(".onnx") for filename in dir])
|
|
637
|
+
has_onnx_file = find_onnx_files_recursively(input)
|
|
694
638
|
if not has_onnx_file:
|
|
695
639
|
raise ValueError(
|
|
696
640
|
f"The folder {input} does not contain an ONNX model file."
|
|
@@ -839,15 +783,10 @@ class OgaLoad(FirstTool):
|
|
|
839
783
|
|
|
840
784
|
try:
|
|
841
785
|
if device == "npu":
|
|
842
|
-
saved_env_state = self._setup_npu_environment(
|
|
843
|
-
ryzenai_version, oga_path
|
|
844
|
-
)
|
|
845
786
|
# Set USE_AIE_RoPE based on model type
|
|
846
787
|
os.environ["USE_AIE_RoPE"] = (
|
|
847
788
|
"0" if "phi-" in checkpoint.lower() else "1"
|
|
848
789
|
)
|
|
849
|
-
elif device == "hybrid":
|
|
850
|
-
saved_env_state = None
|
|
851
790
|
|
|
852
791
|
self._load_model_and_setup_state(
|
|
853
792
|
state, full_model_path, checkpoint, trust_remote_code
|