lemonade-sdk 8.1.4__py3-none-any.whl → 8.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lemonade-sdk might be problematic. Click here for more details.

Files changed (53) hide show
  1. lemonade/cache.py +6 -1
  2. lemonade/cli.py +47 -5
  3. lemonade/common/inference_engines.py +13 -4
  4. lemonade/common/status.py +4 -4
  5. lemonade/common/system_info.py +544 -1
  6. lemonade/profilers/agt_power.py +437 -0
  7. lemonade/profilers/hwinfo_power.py +429 -0
  8. lemonade/tools/accuracy.py +143 -48
  9. lemonade/tools/adapter.py +6 -1
  10. lemonade/tools/bench.py +26 -8
  11. lemonade/tools/flm/__init__.py +1 -0
  12. lemonade/tools/flm/utils.py +303 -0
  13. lemonade/tools/huggingface/bench.py +6 -1
  14. lemonade/tools/llamacpp/bench.py +146 -27
  15. lemonade/tools/llamacpp/load.py +30 -2
  16. lemonade/tools/llamacpp/utils.py +393 -33
  17. lemonade/tools/oga/bench.py +5 -26
  18. lemonade/tools/oga/load.py +60 -121
  19. lemonade/tools/oga/migration.py +403 -0
  20. lemonade/tools/report/table.py +76 -8
  21. lemonade/tools/server/flm.py +133 -0
  22. lemonade/tools/server/llamacpp.py +220 -553
  23. lemonade/tools/server/serve.py +684 -168
  24. lemonade/tools/server/static/js/chat.js +666 -342
  25. lemonade/tools/server/static/js/model-settings.js +24 -3
  26. lemonade/tools/server/static/js/models.js +597 -73
  27. lemonade/tools/server/static/js/shared.js +79 -14
  28. lemonade/tools/server/static/logs.html +191 -0
  29. lemonade/tools/server/static/styles.css +491 -66
  30. lemonade/tools/server/static/webapp.html +83 -31
  31. lemonade/tools/server/tray.py +158 -38
  32. lemonade/tools/server/utils/macos_tray.py +226 -0
  33. lemonade/tools/server/utils/{system_tray.py → windows_tray.py} +13 -0
  34. lemonade/tools/server/webapp.py +4 -1
  35. lemonade/tools/server/wrapped_server.py +559 -0
  36. lemonade/version.py +1 -1
  37. lemonade_install/install.py +54 -611
  38. {lemonade_sdk-8.1.4.dist-info → lemonade_sdk-8.2.2.dist-info}/METADATA +29 -72
  39. lemonade_sdk-8.2.2.dist-info/RECORD +83 -0
  40. lemonade_server/cli.py +145 -37
  41. lemonade_server/model_manager.py +521 -37
  42. lemonade_server/pydantic_models.py +28 -1
  43. lemonade_server/server_models.json +246 -92
  44. lemonade_server/settings.py +39 -39
  45. lemonade/tools/quark/__init__.py +0 -0
  46. lemonade/tools/quark/quark_load.py +0 -173
  47. lemonade/tools/quark/quark_quantize.py +0 -439
  48. lemonade_sdk-8.1.4.dist-info/RECORD +0 -77
  49. {lemonade_sdk-8.1.4.dist-info → lemonade_sdk-8.2.2.dist-info}/WHEEL +0 -0
  50. {lemonade_sdk-8.1.4.dist-info → lemonade_sdk-8.2.2.dist-info}/entry_points.txt +0 -0
  51. {lemonade_sdk-8.1.4.dist-info → lemonade_sdk-8.2.2.dist-info}/licenses/LICENSE +0 -0
  52. {lemonade_sdk-8.1.4.dist-info → lemonade_sdk-8.2.2.dist-info}/licenses/NOTICE.md +0 -0
  53. {lemonade_sdk-8.1.4.dist-info → lemonade_sdk-8.2.2.dist-info}/top_level.txt +0 -0
@@ -1,8 +1,8 @@
1
1
  import argparse
2
2
  import statistics
3
3
  from statistics import StatisticsError
4
+ import psutil
4
5
  from lemonade.state import State
5
- from lemonade.cache import Keys
6
6
  from lemonade.tools.adapter import ModelAdapter, TokenizerAdapter
7
7
  from lemonade.tools.bench import Bench
8
8
 
@@ -20,16 +20,6 @@ class OgaBench(Bench):
20
20
 
21
21
  unique_name = "oga-bench"
22
22
 
23
- def __init__(self):
24
- super().__init__()
25
-
26
- # Additional statistics generated by this bench tool
27
- self.status_stats.insert(
28
- self.status_stats.index(Keys.TOKEN_GENERATION_TOKENS_PER_SECOND) + 1,
29
- Keys.STD_DEV_TOKENS_PER_SECOND,
30
- )
31
- self.std_dev_token_generation_tokens_per_second_list = []
32
-
33
23
  @staticmethod
34
24
  def parser(add_help: bool = True) -> argparse.ArgumentParser:
35
25
  parser = __class__.helpful_parser(
@@ -62,7 +52,7 @@ class OgaBench(Bench):
62
52
  iterations: int,
63
53
  warmup_iterations: int,
64
54
  output_tokens: int,
65
- ) -> State:
55
+ ):
66
56
 
67
57
  model: ModelAdapter = state.model
68
58
  tokenizer: TokenizerAdapter = state.tokenizer
@@ -120,20 +110,9 @@ class OgaBench(Bench):
120
110
  except StatisticsError:
121
111
  # Less than 2 measurements
122
112
  self.std_dev_token_generation_tokens_per_second_list.append(None)
123
-
124
- def save_stats(self, state):
125
- super().save_stats(state)
126
-
127
- # Save additional statistics
128
- if not all(
129
- element is None
130
- for element in self.std_dev_token_generation_tokens_per_second_list
131
- ):
132
- state.save_stat(
133
- Keys.STD_DEV_TOKENS_PER_SECOND,
134
- self.get_item_or_list(
135
- self.std_dev_token_generation_tokens_per_second_list
136
- ),
113
+ if self.save_max_memory_used:
114
+ self.max_memory_used_gb_list.append(
115
+ psutil.Process().memory_info().peak_wset / 1024**3
137
116
  )
138
117
 
139
118
 
@@ -38,6 +38,17 @@ execution_providers = {
38
38
  }
39
39
 
40
40
 
41
+ def find_onnx_files_recursively(directory):
42
+ """
43
+ Recursively search for ONNX files in a directory and its subdirectories.
44
+ """
45
+ for _, _, files in os.walk(directory):
46
+ for file in files:
47
+ if file.endswith(".onnx"):
48
+ return True
49
+ return False
50
+
51
+
41
52
  def _get_npu_driver_version():
42
53
  """
43
54
  Get the NPU driver version using PowerShell directly.
@@ -74,6 +85,17 @@ def _get_npu_driver_version():
74
85
  return None
75
86
 
76
87
 
88
+ def _compare_driver_versions(current_version, required_version):
89
+ """
90
+ Compare two driver version strings.
91
+ Returns True if current_version >= required_version, False otherwise.
92
+ Uses packaging.version for proper semantic version comparison.
93
+ """
94
+ from packaging.version import Version
95
+
96
+ return Version(current_version) >= Version(required_version)
97
+
98
+
77
99
  def import_error_heler(e: Exception):
78
100
  """
79
101
  Print a helpful message in the event of an import error
@@ -310,6 +332,7 @@ class OgaLoad(FirstTool):
310
332
 
311
333
  @staticmethod
312
334
  def _setup_model_dependencies(full_model_path, device, ryzenai_version, oga_path):
335
+ # pylint: disable=unused-argument
313
336
  """
314
337
  Sets up model dependencies for hybrid and NPU inference by:
315
338
  1. Configuring the custom_ops_library path in genai_config.json.
@@ -317,74 +340,45 @@ class OgaLoad(FirstTool):
317
340
  3. Check NPU driver version if required for device and ryzenai_version.
318
341
  """
319
342
 
320
- env_path = sys.prefix
343
+ # For RyzenAI 1.6.0, check NPU driver version for NPU and hybrid devices
344
+ if device in ["npu", "hybrid"]:
345
+ required_driver_version = REQUIRED_NPU_DRIVER_VERSION
321
346
 
322
- if "1.4.0" in ryzenai_version:
323
- if device == "npu":
324
- custom_ops_path = os.path.join(
325
- oga_path, "libs", "onnxruntime_vitis_ai_custom_ops.dll"
326
- )
327
- else:
328
- custom_ops_path = os.path.join(oga_path, "libs", "onnx_custom_ops.dll")
329
- else:
330
- # For 1.5.0+, check NPU driver version for NPU and hybrid devices
331
- if device in ["npu", "hybrid"]:
332
- required_driver_version = REQUIRED_NPU_DRIVER_VERSION
333
-
334
- current_driver_version = _get_npu_driver_version()
335
-
336
- if not current_driver_version:
337
- printing.log_warning(
338
- f"NPU driver not found. {device.upper()} inference requires NPU driver "
339
- f"version {required_driver_version}.\n"
340
- "Please download and install the NPU Driver from:\n"
341
- f"{NPU_DRIVER_DOWNLOAD_URL}\n"
342
- "NPU functionality may not work properly."
343
- )
344
- _open_driver_install_page()
345
-
346
- elif current_driver_version != required_driver_version:
347
- printing.log_warning(
348
- f"Incorrect NPU driver version detected: {current_driver_version}\n"
349
- f"{device.upper()} inference with RyzenAI 1.5.0 requires driver "
350
- f"version {required_driver_version}.\n"
351
- "Please download and install the correct NPU Driver from:\n"
352
- f"{NPU_DRIVER_DOWNLOAD_URL}\n"
353
- "NPU functionality may not work properly."
354
- )
355
- _open_driver_install_page()
356
-
357
- if device == "npu":
358
- # For 1.5.0, custom ops are in the conda environment's onnxruntime package
359
- custom_ops_path = os.path.join(
360
- env_path,
361
- "Lib",
362
- "site-packages",
363
- "onnxruntime",
364
- "capi",
365
- "onnxruntime_vitis_ai_custom_ops.dll",
366
- )
367
- dll_source_path = os.path.join(
368
- env_path, "Lib", "site-packages", "onnxruntime", "capi"
369
- )
370
- required_dlls = ["dyn_dispatch_core.dll", "xaiengine.dll"]
371
- else:
372
- custom_ops_path = os.path.join(
373
- env_path,
374
- "Lib",
375
- "site-packages",
376
- "onnxruntime_genai",
377
- "onnx_custom_ops.dll",
347
+ current_driver_version = _get_npu_driver_version()
348
+ rai_version, _ = _get_ryzenai_version_info(device)
349
+
350
+ if not current_driver_version:
351
+ printing.log_warning(
352
+ f"NPU driver not found. {device.upper()} inference requires NPU driver "
353
+ f"version {required_driver_version}.\n"
354
+ "Please download and install the NPU Driver from:\n"
355
+ f"{NPU_DRIVER_DOWNLOAD_URL}\n"
356
+ "NPU functionality may not work properly."
378
357
  )
379
- dll_source_path = os.path.join(
380
- env_path, "Lib", "site-packages", "onnxruntime_genai"
358
+ _open_driver_install_page()
359
+
360
+ elif not _compare_driver_versions(
361
+ current_driver_version, required_driver_version
362
+ ):
363
+ printing.log_warning(
364
+ f"Incorrect NPU driver version detected: {current_driver_version}\n"
365
+ f"{device.upper()} inference with RyzenAI {rai_version} requires driver "
366
+ f"version {required_driver_version} or higher.\n"
367
+ "Please download and install the correct NPU Driver from:\n"
368
+ f"{NPU_DRIVER_DOWNLOAD_URL}\n"
369
+ "NPU functionality may not work properly."
381
370
  )
382
- required_dlls = ["libutf8_validity.dll", "abseil_dll.dll"]
371
+ _open_driver_install_page()
372
+
373
+ # Setup DLL paths for NPU/hybrid inference
374
+ env_path = os.path.dirname(sys.executable)
375
+ dll_source_path = os.path.join(
376
+ env_path, "Lib", "site-packages", "onnxruntime_genai"
377
+ )
378
+ required_dlls = ["libutf8_validity.dll", "abseil_dll.dll"]
383
379
 
384
380
  # Validate that all required DLLs exist in the source directory
385
381
  missing_dlls = []
386
- if not os.path.exists(custom_ops_path):
387
- missing_dlls.append(custom_ops_path)
388
382
 
389
383
  for dll_name in required_dlls:
390
384
  dll_source = os.path.join(dll_source_path, dll_name)
@@ -395,7 +389,9 @@ class OgaLoad(FirstTool):
395
389
  dll_list = "\n - ".join(missing_dlls)
396
390
  raise RuntimeError(
397
391
  f"Required DLLs not found for {device} inference:\n - {dll_list}\n"
398
- f"Please ensure your RyzenAI installation is complete and supports {device}."
392
+ f"Please ensure your RyzenAI installation is complete and supports {device}.\n"
393
+ "Please reinstall the RyzenAI Software for your platform. Run:\n"
394
+ " pip install lemonade-sdk[oga-ryzenai]\n"
399
395
  )
400
396
 
401
397
  # Add the DLL source directory to PATH
@@ -403,29 +399,6 @@ class OgaLoad(FirstTool):
403
399
  if dll_source_path not in current_path:
404
400
  os.environ["PATH"] = dll_source_path + os.pathsep + current_path
405
401
 
406
- # Update the model config with custom_ops_library path
407
- config_path = os.path.join(full_model_path, "genai_config.json")
408
- if os.path.exists(config_path):
409
- with open(config_path, "r", encoding="utf-8") as f:
410
- config = json.load(f)
411
-
412
- if (
413
- "model" in config
414
- and "decoder" in config["model"]
415
- and "session_options" in config["model"]["decoder"]
416
- ):
417
- config["model"]["decoder"]["session_options"][
418
- "custom_ops_library"
419
- ] = custom_ops_path
420
-
421
- with open(config_path, "w", encoding="utf-8") as f:
422
- json.dump(config, f, indent=4)
423
-
424
- else:
425
- printing.log_info(
426
- f"Model's `genai_config.json` not found in {full_model_path}"
427
- )
428
-
429
402
  @staticmethod
430
403
  def _is_preoptimized_model(input_model_path):
431
404
  """
@@ -489,34 +462,6 @@ class OgaLoad(FirstTool):
489
462
 
490
463
  return full_model_path
491
464
 
492
- @staticmethod
493
- def _setup_npu_environment(ryzenai_version, oga_path):
494
- """
495
- Sets up environment for NPU flow of ONNX model and returns saved state to be restored
496
- later in cleanup.
497
- """
498
- if "1.5.0" in ryzenai_version:
499
- # For PyPI installation (1.5.0+), no environment setup needed
500
- return None
501
- elif "1.4.0" in ryzenai_version:
502
- # Legacy lemonade-install approach for 1.4.0
503
- if not os.path.exists(os.path.join(oga_path, "libs", "onnxruntime.dll")):
504
- raise RuntimeError(
505
- f"Cannot find libs/onnxruntime.dll in lib folder: {oga_path}"
506
- )
507
-
508
- # Save current state so they can be restored after inference.
509
- saved_state = {"cwd": os.getcwd(), "path": os.environ["PATH"]}
510
-
511
- # Setup NPU environment (cwd and path will be restored later)
512
- os.chdir(oga_path)
513
- os.environ["PATH"] = (
514
- os.path.join(oga_path, "libs") + os.pathsep + os.environ["PATH"]
515
- )
516
- return saved_state
517
- else:
518
- raise ValueError(f"Unsupported RyzenAI version: {ryzenai_version}")
519
-
520
465
  @staticmethod
521
466
  def _load_model_and_setup_state(
522
467
  state, full_model_path, checkpoint, trust_remote_code
@@ -689,8 +634,7 @@ class OgaLoad(FirstTool):
689
634
  state.save_stat(Keys.CHECKPOINT, checkpoint)
690
635
  state.save_stat(Keys.LOCAL_MODEL_FOLDER, full_model_path)
691
636
  # See if there is a file ending in ".onnx" in this folder
692
- dir = os.listdir(input)
693
- has_onnx_file = any([filename.endswith(".onnx") for filename in dir])
637
+ has_onnx_file = find_onnx_files_recursively(input)
694
638
  if not has_onnx_file:
695
639
  raise ValueError(
696
640
  f"The folder {input} does not contain an ONNX model file."
@@ -839,15 +783,10 @@ class OgaLoad(FirstTool):
839
783
 
840
784
  try:
841
785
  if device == "npu":
842
- saved_env_state = self._setup_npu_environment(
843
- ryzenai_version, oga_path
844
- )
845
786
  # Set USE_AIE_RoPE based on model type
846
787
  os.environ["USE_AIE_RoPE"] = (
847
788
  "0" if "phi-" in checkpoint.lower() else "1"
848
789
  )
849
- elif device == "hybrid":
850
- saved_env_state = None
851
790
 
852
791
  self._load_model_and_setup_state(
853
792
  state, full_model_path, checkpoint, trust_remote_code