lemonade-sdk 8.1.4__py3-none-any.whl → 8.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lemonade-sdk might be problematic. Click here for more details.

Files changed (53) hide show
  1. lemonade/cache.py +6 -1
  2. lemonade/cli.py +47 -5
  3. lemonade/common/inference_engines.py +13 -4
  4. lemonade/common/status.py +4 -4
  5. lemonade/common/system_info.py +544 -1
  6. lemonade/profilers/agt_power.py +437 -0
  7. lemonade/profilers/hwinfo_power.py +429 -0
  8. lemonade/tools/accuracy.py +143 -48
  9. lemonade/tools/adapter.py +6 -1
  10. lemonade/tools/bench.py +26 -8
  11. lemonade/tools/flm/__init__.py +1 -0
  12. lemonade/tools/flm/utils.py +303 -0
  13. lemonade/tools/huggingface/bench.py +6 -1
  14. lemonade/tools/llamacpp/bench.py +146 -27
  15. lemonade/tools/llamacpp/load.py +30 -2
  16. lemonade/tools/llamacpp/utils.py +393 -33
  17. lemonade/tools/oga/bench.py +5 -26
  18. lemonade/tools/oga/load.py +60 -121
  19. lemonade/tools/oga/migration.py +403 -0
  20. lemonade/tools/report/table.py +76 -8
  21. lemonade/tools/server/flm.py +133 -0
  22. lemonade/tools/server/llamacpp.py +220 -553
  23. lemonade/tools/server/serve.py +684 -168
  24. lemonade/tools/server/static/js/chat.js +666 -342
  25. lemonade/tools/server/static/js/model-settings.js +24 -3
  26. lemonade/tools/server/static/js/models.js +597 -73
  27. lemonade/tools/server/static/js/shared.js +79 -14
  28. lemonade/tools/server/static/logs.html +191 -0
  29. lemonade/tools/server/static/styles.css +491 -66
  30. lemonade/tools/server/static/webapp.html +83 -31
  31. lemonade/tools/server/tray.py +158 -38
  32. lemonade/tools/server/utils/macos_tray.py +226 -0
  33. lemonade/tools/server/utils/{system_tray.py → windows_tray.py} +13 -0
  34. lemonade/tools/server/webapp.py +4 -1
  35. lemonade/tools/server/wrapped_server.py +559 -0
  36. lemonade/version.py +1 -1
  37. lemonade_install/install.py +54 -611
  38. {lemonade_sdk-8.1.4.dist-info → lemonade_sdk-8.2.2.dist-info}/METADATA +29 -72
  39. lemonade_sdk-8.2.2.dist-info/RECORD +83 -0
  40. lemonade_server/cli.py +145 -37
  41. lemonade_server/model_manager.py +521 -37
  42. lemonade_server/pydantic_models.py +28 -1
  43. lemonade_server/server_models.json +246 -92
  44. lemonade_server/settings.py +39 -39
  45. lemonade/tools/quark/__init__.py +0 -0
  46. lemonade/tools/quark/quark_load.py +0 -173
  47. lemonade/tools/quark/quark_quantize.py +0 -439
  48. lemonade_sdk-8.1.4.dist-info/RECORD +0 -77
  49. {lemonade_sdk-8.1.4.dist-info → lemonade_sdk-8.2.2.dist-info}/WHEEL +0 -0
  50. {lemonade_sdk-8.1.4.dist-info → lemonade_sdk-8.2.2.dist-info}/entry_points.txt +0 -0
  51. {lemonade_sdk-8.1.4.dist-info → lemonade_sdk-8.2.2.dist-info}/licenses/LICENSE +0 -0
  52. {lemonade_sdk-8.1.4.dist-info → lemonade_sdk-8.2.2.dist-info}/licenses/NOTICE.md +0 -0
  53. {lemonade_sdk-8.1.4.dist-info → lemonade_sdk-8.2.2.dist-info}/top_level.txt +0 -0
lemonade/cache.py CHANGED
@@ -43,7 +43,11 @@ def build_name(input_name):
43
43
  """
44
44
 
45
45
  if os.path.isdir(input_name):
46
+ # Input is a folder so no good way to determine a model name
46
47
  input_name_sanitized = "local_model"
48
+ elif os.path.isfile(input_name):
49
+ # Use the filename without its extension
50
+ input_name_sanitized = os.path.splitext(os.path.basename(input_name))[0]
47
51
  else:
48
52
  # Sanitize the input name
49
53
  input_name_sanitized = input_name.replace("/", "_")
@@ -63,8 +67,9 @@ class Keys:
63
67
  TOKEN_GENERATION_TOKENS_PER_SECOND = "token_generation_tokens_per_second"
64
68
  STD_DEV_TOKENS_PER_SECOND = "std_dev_tokens_per_second"
65
69
  SECONDS_TO_FIRST_TOKEN = "seconds_to_first_token"
66
- PREFILL_TOKENS_PER_SECOND = "prefill_tokens_per_second"
67
70
  STD_DEV_SECONDS_TO_FIRST_TOKEN = "std_dev_seconds_to_first_token"
71
+ PREFILL_TOKENS_PER_SECOND = "prefill_tokens_per_second"
72
+ STD_DEV_PREFILL_TOKENS_PER_SECOND = "std_dev_prefill_tokens_per_second"
68
73
  CHECKPOINT = "checkpoint"
69
74
  DTYPE = "dtype"
70
75
  PROMPT = "prompt"
lemonade/cli.py CHANGED
@@ -12,6 +12,41 @@ from lemonade.sequence import Sequence
12
12
  from lemonade.tools.management_tools import Cache, Version, SystemInfo
13
13
  from lemonade.state import State
14
14
 
15
+
16
+ def get_available_profilers(warn_missing=False):
17
+ """Get list of available profilers, with conditional imports for optional dependencies.
18
+
19
+ Args:
20
+ warn_missing: If True, print warnings for missing profilers. If False, fail silently.
21
+ """
22
+ profilers = [MemoryTracker]
23
+
24
+ try:
25
+ from lemonade.profilers.hwinfo_power import HWINFOPowerProfiler
26
+
27
+ profilers.append(HWINFOPowerProfiler)
28
+ except ImportError:
29
+ if warn_missing:
30
+ print(
31
+ "Warning: HWINFOPowerProfiler not available. "
32
+ "Install lemonade with dev extras: "
33
+ "pip install lemonade-sdk[dev]"
34
+ )
35
+ try:
36
+ from lemonade.profilers.agt_power import AGTPowerProfiler
37
+
38
+ profilers.append(AGTPowerProfiler)
39
+ except ImportError:
40
+ if warn_missing:
41
+ print(
42
+ "Warning: AGTPowerProfiler not available. "
43
+ "Install lemonade with dev extras: "
44
+ "pip install lemonade-sdk[dev]"
45
+ )
46
+
47
+ return profilers
48
+
49
+
15
50
  from lemonade.tools.huggingface.load import HuggingfaceLoad
16
51
  from lemonade.tools.huggingface.bench import HuggingfaceBench
17
52
  from lemonade.tools.oga.load import OgaLoad
@@ -25,8 +60,6 @@ from lemonade.tools.humaneval import AccuracyHumaneval
25
60
  from lemonade.tools.perplexity import AccuracyPerplexity
26
61
  from lemonade.tools.accuracy import LMEvalHarness
27
62
  from lemonade.tools.prompt import LLMPrompt
28
- from lemonade.tools.quark.quark_load import QuarkLoad
29
- from lemonade.tools.quark.quark_quantize import QuarkQuantize
30
63
  from lemonade.tools.report.llm_report import LemonadeReport
31
64
 
32
65
 
@@ -45,8 +78,6 @@ def main():
45
78
  HuggingfaceBench,
46
79
  OgaLoad,
47
80
  OgaBench,
48
- QuarkQuantize,
49
- QuarkLoad,
50
81
  LemonadeReport,
51
82
  # Inherited from lemonade
52
83
  Cache,
@@ -55,7 +86,7 @@ def main():
55
86
  ]
56
87
 
57
88
  # List the available profilers
58
- profilers = [MemoryTracker]
89
+ profilers = get_available_profilers()
59
90
 
60
91
  # Define the argument parser
61
92
  parser = cli.CustomArgumentParser(
@@ -89,6 +120,17 @@ https://github.com/lemonade-sdk/lemonade/blob/main/docs/README.md""",
89
120
  parser, tools, cli_name="lemonade"
90
121
  )
91
122
 
123
+ # Check if any profilers are being requested
124
+ requested_profilers = [
125
+ profiler.unique_name.replace("-", "_")
126
+ for profiler in profilers
127
+ if global_args.get(profiler.unique_name.replace("-", "_"), None) is not None
128
+ ]
129
+
130
+ # If profilers are requested, get the full list with warnings for missing ones
131
+ if requested_profilers:
132
+ get_available_profilers(warn_missing=True)
133
+
92
134
  profiler_instances = [
93
135
  profiler(global_args[profiler.unique_name.replace("-", "_")])
94
136
  for profiler in profilers
@@ -24,7 +24,7 @@ class InferenceEngineDetector:
24
24
  Detect all available inference engines for a specific device type.
25
25
 
26
26
  Args:
27
- device_type: "cpu", "amd_igpu", "amd_dgpu", or "npu"
27
+ device_type: "cpu", "amd_igpu", "amd_dgpu", "nvidia_dgpu", or "npu"
28
28
 
29
29
  Returns:
30
30
  dict: Engine availability information
@@ -223,17 +223,26 @@ class LlamaCppDetector(BaseEngineDetector):
223
223
  """
224
224
  try:
225
225
 
226
- if device_type not in ["cpu", "amd_igpu", "amd_dgpu"]:
226
+ if device_type not in ["cpu", "amd_igpu", "amd_dgpu", "nvidia_dgpu"]:
227
227
  return None
228
228
 
229
229
  # Check if the device is supported by the backend
230
230
  if device_type == "cpu":
231
231
  device_supported = True
232
- elif device_type == "amd_igpu" or device_type == "amd_dgpu":
232
+ elif device_type in ["amd_igpu", "amd_dgpu"]:
233
233
  if backend == "vulkan":
234
234
  device_supported = self._check_vulkan_support()
235
235
  elif backend == "rocm":
236
236
  device_supported = self._check_rocm_support(device_name.lower())
237
+ else:
238
+ device_supported = False
239
+ elif device_type == "nvidia_dgpu":
240
+ if backend == "vulkan":
241
+ device_supported = self._check_vulkan_support()
242
+ else:
243
+ device_supported = False
244
+ else:
245
+ device_supported = False
237
246
  if not device_supported:
238
247
  return {"available": False, "error": f"{backend} not available"}
239
248
 
@@ -390,7 +399,7 @@ def detect_inference_engines(device_type: str, device_name: str) -> Dict[str, Di
390
399
  Helper function to detect inference engines for a device type.
391
400
 
392
401
  Args:
393
- device_type: "cpu", "amd_igpu", "amd_dgpu", or "npu"
402
+ device_type: "cpu", "amd_igpu", "amd_dgpu", "nvidia_dgpu", or "npu"
394
403
  device_name: device name
395
404
 
396
405
  Returns:
lemonade/common/status.py CHANGED
@@ -112,10 +112,10 @@ class UniqueInvocationInfo(BasicInfo):
112
112
  if print_file_name:
113
113
  print(f"{self.script_name}{self.extension}:")
114
114
 
115
- # Print invocation about the model (only applies to scripts, not ONNX files or
115
+ # Print invocation about the model (only applies to scripts, not ONNX or GGUF files, nor
116
116
  # LLMs, which have no extension)
117
117
  if not (
118
- self.extension == ".onnx"
118
+ self.extension in [".onnx", ".gguf"]
119
119
  or self.extension == build.state_file_name
120
120
  or self.extension == ""
121
121
  ):
@@ -138,7 +138,7 @@ class UniqueInvocationInfo(BasicInfo):
138
138
 
139
139
  if self.depth == 0:
140
140
  print(f"{self.indent}\tLocation:\t{self.file}", end="")
141
- if self.extension == ".onnx":
141
+ if self.extension in [".onnx", ".gguf"]:
142
142
  print()
143
143
  else:
144
144
  print(f", line {self.line}")
@@ -314,7 +314,7 @@ class UniqueInvocationInfo(BasicInfo):
314
314
  Print information about a given model or submodel.
315
315
  """
316
316
 
317
- if self.extension == ".onnx" or self.extension == "":
317
+ if self.extension in [".onnx", ".gguf"] or self.extension == "":
318
318
  self.indent = "\t" * (2 * self.depth)
319
319
  else:
320
320
  self.indent = "\t" * (2 * self.depth + 1)