PyPI - lemonade-sdk - Versions diffs - 8.1.4__py3-none-any.whl → 8.2.2__py3-none-any.whl - Mend

lemonade-sdk 8.1.4py3-none-any.whl → 8.2.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of lemonade-sdk might be problematic. Click here for more details.

Files changed (53) hide show

lemonade/cache.py +6 -1
lemonade/cli.py +47 -5
lemonade/common/inference_engines.py +13 -4
lemonade/common/status.py +4 -4
lemonade/common/system_info.py +544 -1
lemonade/profilers/agt_power.py +437 -0
lemonade/profilers/hwinfo_power.py +429 -0
lemonade/tools/accuracy.py +143 -48
lemonade/tools/adapter.py +6 -1
lemonade/tools/bench.py +26 -8
lemonade/tools/flm/__init__.py +1 -0
lemonade/tools/flm/utils.py +303 -0
lemonade/tools/huggingface/bench.py +6 -1
lemonade/tools/llamacpp/bench.py +146 -27
lemonade/tools/llamacpp/load.py +30 -2
lemonade/tools/llamacpp/utils.py +393 -33
lemonade/tools/oga/bench.py +5 -26
lemonade/tools/oga/load.py +60 -121
lemonade/tools/oga/migration.py +403 -0
lemonade/tools/report/table.py +76 -8
lemonade/tools/server/flm.py +133 -0
lemonade/tools/server/llamacpp.py +220 -553
lemonade/tools/server/serve.py +684 -168
lemonade/tools/server/static/js/chat.js +666 -342
lemonade/tools/server/static/js/model-settings.js +24 -3
lemonade/tools/server/static/js/models.js +597 -73
lemonade/tools/server/static/js/shared.js +79 -14
lemonade/tools/server/static/logs.html +191 -0
lemonade/tools/server/static/styles.css +491 -66
lemonade/tools/server/static/webapp.html +83 -31
lemonade/tools/server/tray.py +158 -38
lemonade/tools/server/utils/macos_tray.py +226 -0
lemonade/tools/server/utils/{system_tray.py → windows_tray.py} +13 -0
lemonade/tools/server/webapp.py +4 -1
lemonade/tools/server/wrapped_server.py +559 -0
lemonade/version.py +1 -1
lemonade_install/install.py +54 -611
{lemonade_sdk-8.1.4.dist-info → lemonade_sdk-8.2.2.dist-info}/METADATA +29 -72
lemonade_sdk-8.2.2.dist-info/RECORD +83 -0
lemonade_server/cli.py +145 -37
lemonade_server/model_manager.py +521 -37
lemonade_server/pydantic_models.py +28 -1
lemonade_server/server_models.json +246 -92
lemonade_server/settings.py +39 -39
lemonade/tools/quark/__init__.py +0 -0
lemonade/tools/quark/quark_load.py +0 -173
lemonade/tools/quark/quark_quantize.py +0 -439
lemonade_sdk-8.1.4.dist-info/RECORD +0 -77
{lemonade_sdk-8.1.4.dist-info → lemonade_sdk-8.2.2.dist-info}/WHEEL +0 -0
{lemonade_sdk-8.1.4.dist-info → lemonade_sdk-8.2.2.dist-info}/entry_points.txt +0 -0
{lemonade_sdk-8.1.4.dist-info → lemonade_sdk-8.2.2.dist-info}/licenses/LICENSE +0 -0
{lemonade_sdk-8.1.4.dist-info → lemonade_sdk-8.2.2.dist-info}/licenses/NOTICE.md +0 -0
{lemonade_sdk-8.1.4.dist-info → lemonade_sdk-8.2.2.dist-info}/top_level.txt +0 -0

lemonade/cache.py CHANGED Viewed

@@ -43,7 +43,11 @@ def build_name(input_name):
     """
     if os.path.isdir(input_name):
+        # Input is a folder so no good way to determine a model name
         input_name_sanitized = "local_model"
+    elif os.path.isfile(input_name):
+        # Use the filename without its extension
+        input_name_sanitized = os.path.splitext(os.path.basename(input_name))[0]
     else:
         # Sanitize the input name
         input_name_sanitized = input_name.replace("/", "_")
@@ -63,8 +67,9 @@ class Keys:
     TOKEN_GENERATION_TOKENS_PER_SECOND = "token_generation_tokens_per_second"
     STD_DEV_TOKENS_PER_SECOND = "std_dev_tokens_per_second"
     SECONDS_TO_FIRST_TOKEN = "seconds_to_first_token"
-    PREFILL_TOKENS_PER_SECOND = "prefill_tokens_per_second"
     STD_DEV_SECONDS_TO_FIRST_TOKEN = "std_dev_seconds_to_first_token"
+    PREFILL_TOKENS_PER_SECOND = "prefill_tokens_per_second"
+    STD_DEV_PREFILL_TOKENS_PER_SECOND = "std_dev_prefill_tokens_per_second"
     CHECKPOINT = "checkpoint"
     DTYPE = "dtype"
     PROMPT = "prompt"

lemonade/cli.py CHANGED Viewed

@@ -12,6 +12,41 @@ from lemonade.sequence import Sequence
 from lemonade.tools.management_tools import Cache, Version, SystemInfo
 from lemonade.state import State
+def get_available_profilers(warn_missing=False):
+    """Get list of available profilers, with conditional imports for optional dependencies.
+    Args:
+        warn_missing: If True, print warnings for missing profilers. If False, fail silently.
+    """
+    profilers = [MemoryTracker]
+    try:
+        from lemonade.profilers.hwinfo_power import HWINFOPowerProfiler
+        profilers.append(HWINFOPowerProfiler)
+    except ImportError:
+        if warn_missing:
+            print(
+                "Warning: HWINFOPowerProfiler not available. "
+                "Install lemonade with dev extras: "
+                "pip install lemonade-sdk[dev]"
+            )
+    try:
+        from lemonade.profilers.agt_power import AGTPowerProfiler
+        profilers.append(AGTPowerProfiler)
+    except ImportError:
+        if warn_missing:
+            print(
+                "Warning: AGTPowerProfiler not available. "
+                "Install lemonade with dev extras: "
+                "pip install lemonade-sdk[dev]"
+            )
+    return profilers
 from lemonade.tools.huggingface.load import HuggingfaceLoad
 from lemonade.tools.huggingface.bench import HuggingfaceBench
 from lemonade.tools.oga.load import OgaLoad
@@ -25,8 +60,6 @@ from lemonade.tools.humaneval import AccuracyHumaneval
 from lemonade.tools.perplexity import AccuracyPerplexity
 from lemonade.tools.accuracy import LMEvalHarness
 from lemonade.tools.prompt import LLMPrompt
-from lemonade.tools.quark.quark_load import QuarkLoad
-from lemonade.tools.quark.quark_quantize import QuarkQuantize
 from lemonade.tools.report.llm_report import LemonadeReport
@@ -45,8 +78,6 @@ def main():
         HuggingfaceBench,
         OgaLoad,
         OgaBench,
-        QuarkQuantize,
-        QuarkLoad,
         LemonadeReport,
         # Inherited from lemonade
         Cache,
@@ -55,7 +86,7 @@ def main():
     ]
     # List the available profilers
-    profilers = [MemoryTracker]
+    profilers = get_available_profilers()
     # Define the argument parser
     parser = cli.CustomArgumentParser(
@@ -89,6 +120,17 @@ https://github.com/lemonade-sdk/lemonade/blob/main/docs/README.md""",
         parser, tools, cli_name="lemonade"
     )
+    # Check if any profilers are being requested
+    requested_profilers = [
+        profiler.unique_name.replace("-", "_")
+        for profiler in profilers
+        if global_args.get(profiler.unique_name.replace("-", "_"), None) is not None
+    ]
+    # If profilers are requested, get the full list with warnings for missing ones
+    if requested_profilers:
+        get_available_profilers(warn_missing=True)
     profiler_instances = [
         profiler(global_args[profiler.unique_name.replace("-", "_")])
         for profiler in profilers

lemonade/common/inference_engines.py CHANGED Viewed

@@ -24,7 +24,7 @@ class InferenceEngineDetector:
         Detect all available inference engines for a specific device type.
         Args:
-            device_type: "cpu", "amd_igpu", "amd_dgpu", or "npu"
+            device_type: "cpu", "amd_igpu", "amd_dgpu", "nvidia_dgpu", or "npu"
         Returns:
             dict: Engine availability information
@@ -223,17 +223,26 @@ class LlamaCppDetector(BaseEngineDetector):
         """
         try:
-            if device_type not in ["cpu", "amd_igpu", "amd_dgpu"]:
+            if device_type not in ["cpu", "amd_igpu", "amd_dgpu", "nvidia_dgpu"]:
                 return None
             # Check if the device is supported by the backend
             if device_type == "cpu":
                 device_supported = True
-            elif device_type == "amd_igpu" or device_type == "amd_dgpu":
+            elif device_type in ["amd_igpu", "amd_dgpu"]:
                 if backend == "vulkan":
                     device_supported = self._check_vulkan_support()
                 elif backend == "rocm":
                     device_supported = self._check_rocm_support(device_name.lower())
+                else:
+                    device_supported = False
+            elif device_type == "nvidia_dgpu":
+                if backend == "vulkan":
+                    device_supported = self._check_vulkan_support()
+                else:
+                    device_supported = False
+            else:
+                device_supported = False
             if not device_supported:
                 return {"available": False, "error": f"{backend} not available"}
@@ -390,7 +399,7 @@ def detect_inference_engines(device_type: str, device_name: str) -> Dict[str, Di
     Helper function to detect inference engines for a device type.
     Args:
-        device_type: "cpu", "amd_igpu", "amd_dgpu", or "npu"
+        device_type: "cpu", "amd_igpu", "amd_dgpu", "nvidia_dgpu", or "npu"
         device_name: device name
     Returns:

lemonade/common/status.py CHANGED Viewed

@@ -112,10 +112,10 @@ class UniqueInvocationInfo(BasicInfo):
         if print_file_name:
             print(f"{self.script_name}{self.extension}:")
-        # Print invocation about the model (only applies to scripts, not ONNX files or
+        # Print invocation about the model (only applies to scripts, not ONNX or GGUF files, nor
         # LLMs, which have no extension)
         if not (
-            self.extension == ".onnx"
+            self.extension in [".onnx", ".gguf"]
             or self.extension == build.state_file_name
             or self.extension == ""
         ):
@@ -138,7 +138,7 @@ class UniqueInvocationInfo(BasicInfo):
         if self.depth == 0:
             print(f"{self.indent}\tLocation:\t{self.file}", end="")
-            if self.extension == ".onnx":
+            if self.extension in [".onnx", ".gguf"]:
                 print()
             else:
                 print(f", line {self.line}")
@@ -314,7 +314,7 @@ class UniqueInvocationInfo(BasicInfo):
         Print information about a given model or submodel.
         """
-        if self.extension == ".onnx" or self.extension == "":
+        if self.extension in [".onnx", ".gguf"] or self.extension == "":
             self.indent = "\t" * (2 * self.depth)
         else:
             self.indent = "\t" * (2 * self.depth + 1)

lemonade-sdk 8.1.4__py3-none-any.whl → 8.2.2__py3-none-any.whl

Potentially problematic release.

lemonade-sdk 8.1.4py3-none-any.whl → 8.2.2py3-none-any.whl