PyPI - lemonade-sdk - Versions diffs - 8.1.10__py3-none-any.whl → 8.1.12__py3-none-any.whl - Mend

lemonade-sdk 8.1.10py3-none-any.whl → 8.1.12py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of lemonade-sdk might be problematic. Click here for more details.

Files changed (37) hide show

lemonade/cache.py +6 -1
lemonade/common/status.py +4 -4
lemonade/tools/bench.py +22 -1
lemonade/tools/flm/__init__.py +1 -0
lemonade/tools/flm/utils.py +255 -0
lemonade/tools/llamacpp/bench.py +111 -23
lemonade/tools/llamacpp/load.py +20 -1
lemonade/tools/llamacpp/utils.py +210 -17
lemonade/tools/oga/bench.py +0 -26
lemonade/tools/report/table.py +6 -0
lemonade/tools/server/flm.py +133 -0
lemonade/tools/server/llamacpp.py +23 -5
lemonade/tools/server/serve.py +260 -135
lemonade/tools/server/static/js/chat.js +165 -82
lemonade/tools/server/static/js/models.js +87 -54
lemonade/tools/server/static/js/shared.js +9 -6
lemonade/tools/server/static/logs.html +57 -0
lemonade/tools/server/static/styles.css +159 -8
lemonade/tools/server/static/webapp.html +28 -10
lemonade/tools/server/tray.py +94 -38
lemonade/tools/server/utils/macos_tray.py +226 -0
lemonade/tools/server/utils/{system_tray.py → windows_tray.py} +13 -0
lemonade/tools/server/webapp.py +4 -1
lemonade/tools/server/wrapped_server.py +91 -25
lemonade/version.py +1 -1
lemonade_install/install.py +25 -2
{lemonade_sdk-8.1.10.dist-info → lemonade_sdk-8.1.12.dist-info}/METADATA +10 -6
{lemonade_sdk-8.1.10.dist-info → lemonade_sdk-8.1.12.dist-info}/RECORD +37 -32
lemonade_server/cli.py +103 -14
lemonade_server/model_manager.py +186 -45
lemonade_server/pydantic_models.py +25 -1
lemonade_server/server_models.json +175 -62
{lemonade_sdk-8.1.10.dist-info → lemonade_sdk-8.1.12.dist-info}/WHEEL +0 -0
{lemonade_sdk-8.1.10.dist-info → lemonade_sdk-8.1.12.dist-info}/entry_points.txt +0 -0
{lemonade_sdk-8.1.10.dist-info → lemonade_sdk-8.1.12.dist-info}/licenses/LICENSE +0 -0
{lemonade_sdk-8.1.10.dist-info → lemonade_sdk-8.1.12.dist-info}/licenses/NOTICE.md +0 -0
{lemonade_sdk-8.1.10.dist-info → lemonade_sdk-8.1.12.dist-info}/top_level.txt +0 -0

lemonade_server/cli.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import argparse
 import sys
 import os
+import platform
 from typing import Tuple, Optional
 import psutil
 from typing import List
@@ -104,12 +105,34 @@ def serve(
         max_wait_time = 30
         wait_interval = 0.5
         waited = 0
-        while waited < max_wait_time:
-            time.sleep(wait_interval)
-            _, running_port = get_server_info()
-            if running_port is not None:
-                break
-            waited += wait_interval
+        if platform.system() == "Darwin":
+            # On macOS, use direct HTTP health check instead of process scanning for better
+            # performance
+            import requests
+            while waited < max_wait_time:
+                time.sleep(wait_interval)
+                try:
+                    response = requests.get(
+                        f"http://{host}:{port}/api/v1/health", timeout=1
+                    )
+                    if response.status_code == 200:
+                        break
+                except (
+                    requests.exceptions.ConnectionError,
+                    requests.exceptions.Timeout,
+                ):
+                    pass  # Server not ready yet
+                waited += wait_interval
+        else:
+            # On other platforms, use the existing approach
+            while waited < max_wait_time:
+                time.sleep(wait_interval)
+                _, running_port = get_server_info()
+                if running_port is not None:
+                    break
+                waited += wait_interval
         return port, server_thread
@@ -176,6 +199,7 @@ def pull(
     checkpoint: Optional[str] = None,
     recipe: Optional[str] = None,
     reasoning: bool = False,
+    vision: bool = False,
     mmproj: str = "",
 ):
     """
@@ -202,6 +226,7 @@ def pull(
                 ("checkpoint", checkpoint),
                 ("recipe", recipe),
                 ("reasoning", reasoning),
+                ("vision", vision),
                 ("mmproj", mmproj),
             ]:
                 if value:
@@ -224,6 +249,7 @@ def pull(
             checkpoint=checkpoint,
             recipe=recipe,
             reasoning=reasoning,
+            vision=vision,
             mmproj=mmproj,
             # The pull command will download an upgraded model if available, even
             # if we already have a local copy of the model
@@ -282,6 +308,10 @@ def run(
     import time
     import os
+    # Disable tray on macOS for run command due to threading issues
+    if platform.system() == "Darwin":
+        tray = False
     # Start the server if not running
     _, running_port = get_server_info()
     server_previously_running = running_port is not None
@@ -367,6 +397,23 @@ def is_lemonade_server(pid):
     """
     Check whether or not a given PID corresponds to a Lemonade server
     """
+    # macOS only: Self-exclusion to prevent blocking server startup
+    if platform.system() == "Darwin":
+        current_pid = os.getpid()
+        if pid == current_pid:
+            return False
+        # Exclude children of current process to avoid detecting status commands
+        try:
+            current_process = psutil.Process(current_pid)
+            child_pids = [
+                child.pid for child in current_process.children(recursive=True)
+            ]
+            if pid in child_pids:
+                return False
+        except (psutil.NoSuchProcess, psutil.AccessDenied):
+            pass
     try:
         process = psutil.Process(pid)
@@ -382,6 +429,22 @@ def is_lemonade_server(pid):
                 "lsdev",
             ]:
                 return True
+            # macOS only: Python scripts appear as "python3.x", check command line
+            elif process_name.startswith("python") and platform.system() == "Darwin":
+                try:
+                    cmdline = process.cmdline()
+                    if len(cmdline) >= 2:
+                        script_path = cmdline[1]
+                        # Check for various lemonade server command patterns (macOS only)
+                        lemonade_patterns = [
+                            "lemonade-server-dev",
+                            "lemonade-server",
+                            "lsdev",  # Short alias for lemonade-server-dev
+                        ]
+                        if any(pattern in script_path for pattern in lemonade_patterns):
+                            return True
+                except (psutil.AccessDenied, psutil.NoSuchProcess):
+                    pass
             elif "llama-server" in process_name:
                 return False
             if not process.parent():
@@ -399,18 +462,43 @@ def get_server_info() -> Tuple[int | None, int | None]:
     2. The port that Lemonade Server is running on
     """
-    # Get all network connections and filter for localhost IPv4 listening ports
+    # Try the global approach first (works on Windows/Linux without permissions)
     try:
         connections = psutil.net_connections(kind="tcp4")
         for conn in connections:
             if conn.status == "LISTEN" and conn.laddr and conn.pid is not None:
                 if is_lemonade_server(conn.pid):
                     return conn.pid, conn.laddr.port
-    except Exception:
+    except (psutil.AccessDenied, PermissionError):
+        # Global approach needs elevated permissions on macOS, fall back to per-process approach
+        pass
+    except Exception:  # pylint: disable=broad-exception-caught
         pass
+    # Per-process approach (macOS only - needs this due to permission requirements)
+    if platform.system() == "Darwin":
+        try:
+            for proc in psutil.process_iter(["pid", "name"]):
+                try:
+                    pid = proc.info["pid"]
+                    if is_lemonade_server(pid):
+                        # Found a lemonade server, check its listening ports
+                        connections = proc.net_connections(kind="inet")
+                        for conn in connections:
+                            if conn.status == "LISTEN" and conn.laddr:
+                                return pid, conn.laddr.port
+                        # If no listening connections found, this process is not actually serving
+                        # Continue looking for other processes
+                except (
+                    psutil.NoSuchProcess,
+                    psutil.AccessDenied,
+                    psutil.ZombieProcess,
+                ):
+                    # Some processes may be inaccessible, continue to next
+                    continue
+        except Exception:  # pylint: disable=broad-exception-caught
+            pass
     return None, None
@@ -425,12 +513,13 @@ def list_models():
     # Get all supported models and downloaded models
     supported_models = model_manager.supported_models
+    filtered_models = model_manager.filter_models_by_backend(supported_models)
     downloaded_models = model_manager.downloaded_models
     # Filter to only show recommended models
     recommended_models = {
         model_name: model_info
-        for model_name, model_info in supported_models.items()
+        for model_name, model_info in filtered_models.items()
         if model_info.get("suggested", False)
     }
@@ -507,7 +596,7 @@ def _add_server_arguments(parser):
         "--llamacpp",
         type=str,
         help="LlamaCpp backend to use",
-        choices=["vulkan", "rocm"],
+        choices=["vulkan", "rocm", "metal"],
         default=DEFAULT_LLAMACPP_BACKEND,
     )
     parser.add_argument(
@@ -520,7 +609,7 @@ def _add_server_arguments(parser):
         default=DEFAULT_CTX_SIZE,
     )
-    if os.name == "nt":
+    if os.name == "nt" or platform.system() == "Darwin":
         parser.add_argument(
             "--no-tray",
             action="store_true",
@@ -620,7 +709,7 @@ def main():
     args = parser.parse_args()
-    if os.name != "nt":
+    if os.name != "nt" and platform.system() != "Darwin":
         args.no_tray = True
     if args.version:

lemonade_server/model_manager.py CHANGED Viewed

@@ -1,16 +1,25 @@
 import json
 import os
+import subprocess
 from typing import Optional
 import shutil
 import huggingface_hub
 from importlib.metadata import distributions
 from lemonade_server.pydantic_models import PullConfig
+from lemonade_server.pydantic_models import PullConfig
 from lemonade.cache import DEFAULT_CACHE_DIR
 from lemonade.tools.llamacpp.utils import parse_checkpoint, download_gguf
 from lemonade.common.network import custom_snapshot_download
 USER_MODELS_FILE = os.path.join(DEFAULT_CACHE_DIR, "user_models.json")
+from lemonade.tools.flm.utils import (
+    get_flm_installed_models,
+    is_flm_available,
+    install_flm,
+    download_flm_model,
+)
 class ModelManager:
@@ -81,46 +90,61 @@ class ModelManager:
         """
         downloaded_models = {}
         downloaded_checkpoints = self.downloaded_hf_checkpoints
+        # Get FLM installed model checkpoints
+        flm_installed_checkpoints = get_flm_installed_models()
         for model in self.supported_models:
             model_info = self.supported_models[model]
-            checkpoint = model_info["checkpoint"]
-            base_checkpoint, variant = parse_checkpoint(checkpoint)
-            if base_checkpoint in downloaded_checkpoints:
-                # For GGUF models with variants, verify the specific variant files exist
-                if variant and model_info.get("recipe") == "llamacpp":
-                    try:
-                        from lemonade.tools.llamacpp.utils import identify_gguf_models
-                        from lemonade.common.network import custom_snapshot_download
-                        # Get the local snapshot path
-                        snapshot_path = custom_snapshot_download(
-                            base_checkpoint, local_files_only=True
-                        )
-                        # Check if the specific variant files exist
-                        core_files, sharded_files = identify_gguf_models(
-                            base_checkpoint, variant, model_info.get("mmproj", "")
-                        )
-                        all_variant_files = list(core_files.values()) + sharded_files
-                        # Verify all required files exist locally
-                        all_files_exist = True
-                        for file_path in all_variant_files:
-                            full_file_path = os.path.join(snapshot_path, file_path)
-                            if not os.path.exists(full_file_path):
-                                all_files_exist = False
-                                break
-                        if all_files_exist:
-                            downloaded_models[model] = model_info
-                    except Exception:
-                        # If we can't verify the variant, don't include it
-                        pass
-                else:
-                    # For non-GGUF models or GGUF without variants, use the original logic
+            # Handle FLM models
+            if model_info.get("recipe") == "flm":
+                if model_info["checkpoint"] in flm_installed_checkpoints:
                     downloaded_models[model] = model_info
+            else:
+                # Handle other models
+                checkpoint = model_info["checkpoint"]
+                base_checkpoint, variant = parse_checkpoint(checkpoint)
+                if base_checkpoint in downloaded_checkpoints:
+                    # For GGUF models with variants, verify the specific variant files exist
+                    if variant and model_info.get("recipe") == "llamacpp":
+                        try:
+                            from lemonade.tools.llamacpp.utils import (
+                                identify_gguf_models,
+                            )
+                            from lemonade.common.network import custom_snapshot_download
+                            # Get the local snapshot path
+                            snapshot_path = custom_snapshot_download(
+                                base_checkpoint, local_files_only=True
+                            )
+                            # Check if the specific variant files exist
+                            core_files, sharded_files = identify_gguf_models(
+                                base_checkpoint, variant, model_info.get("mmproj", "")
+                            )
+                            all_variant_files = (
+                                list(core_files.values()) + sharded_files
+                            )
+                            # Verify all required files exist locally
+                            all_files_exist = True
+                            for file_path in all_variant_files:
+                                full_file_path = os.path.join(snapshot_path, file_path)
+                                if not os.path.exists(full_file_path):
+                                    all_files_exist = False
+                                    break
+                            if all_files_exist:
+                                downloaded_models[model] = model_info
+                        except Exception:
+                            # If we can't verify the variant, don't include it
+                            pass
+                    else:
+                        # For non-GGUF models or GGUF without variants, use the original logic
+                        downloaded_models[model] = model_info
         return downloaded_models
     @property
@@ -137,6 +161,7 @@ class ModelManager:
         checkpoint: Optional[str] = None,
         recipe: Optional[str] = None,
         reasoning: bool = False,
+        vision: bool = False,
         mmproj: str = "",
         do_not_upgrade: bool = False,
     ):
@@ -150,6 +175,7 @@ class ModelManager:
             if model not in self.supported_models:
                 # Register the model as a user model if the model name
                 # is not already registered
+                import logging
                 # Ensure the model name includes the `user` namespace
                 model_parsed = model.split(".", 1)
@@ -172,11 +198,17 @@ class ModelManager:
                     )
                 # JSON content that will be used for registration if the download succeeds
+                labels = ["custom"]
+                if reasoning:
+                    labels.append("reasoning")
+                if vision:
+                    labels.append("vision")
                 new_user_model = {
                     "checkpoint": checkpoint,
                     "recipe": recipe,
                     "suggested": True,
-                    "labels": ["custom"] + (["reasoning"] if reasoning else []),
+                    "labels": labels,
                 }
                 if mmproj:
@@ -199,6 +231,7 @@ class ModelManager:
                     checkpoint=checkpoint,
                     recipe=recipe,
                     reasoning=reasoning,
+                    vision=vision,
                 )
             else:
                 # Model is already registered - check if trying to register with different parameters
@@ -207,18 +240,21 @@ class ModelManager:
                 existing_recipe = existing_model.get("recipe")
                 existing_reasoning = "reasoning" in existing_model.get("labels", [])
                 existing_mmproj = existing_model.get("mmproj", "")
+                existing_vision = "vision" in existing_model.get("labels", [])
                 # Compare parameters
                 checkpoint_differs = checkpoint and checkpoint != existing_checkpoint
                 recipe_differs = recipe and recipe != existing_recipe
                 reasoning_differs = reasoning and reasoning != existing_reasoning
                 mmproj_differs = mmproj and mmproj != existing_mmproj
+                vision_differs = vision and vision != existing_vision
                 if (
                     checkpoint_differs
                     or recipe_differs
                     or reasoning_differs
                     or mmproj_differs
+                    or vision_differs
                 ):
                     conflicts = []
                     if checkpoint_differs:
@@ -237,6 +273,10 @@ class ModelManager:
                         conflicts.append(
                             f"mmproj (existing: '{existing_mmproj}', new: '{mmproj}')"
                         )
+                    if vision_differs:
+                        conflicts.append(
+                            f"vision (existing: {existing_vision}, new: {vision})"
+                        )
                     conflict_details = ", ".join(conflicts)
@@ -260,7 +300,34 @@ class ModelManager:
                 gguf_model_config = PullConfig(**self.supported_models[model])
             print(f"Downloading {model} ({checkpoint_to_download})")
-            if "gguf" in checkpoint_to_download.lower():
+            # Handle FLM models
+            current_recipe = (
+                recipe
+                if new_registration_model_config
+                else self.supported_models[model].get("recipe")
+            )
+            if current_recipe == "flm":
+                # Check if FLM is available, and install it if not
+                if not is_flm_available():
+                    print(
+                        "FLM is not installed or not at the minimum required version. Installing FLM..."
+                    )
+                    install_flm()
+                try:
+                    download_flm_model(checkpoint_to_download, None, do_not_upgrade)
+                    print(f"Successfully downloaded FLM model: {model}")
+                except subprocess.CalledProcessError as e:
+                    raise RuntimeError(
+                        f"Failed to download FLM model {model}: {e}"
+                    ) from e
+                except FileNotFoundError as e:
+                    # This shouldn't happen after install_flm(), but just in case
+                    raise RuntimeError(
+                        f"FLM command not found even after installation attempt. "
+                        f"Please manually install FLM using 'lemonade-install --flm'."
+                    ) from e
+            elif "gguf" in checkpoint_to_download.lower():
                 download_gguf(
                     gguf_model_config.checkpoint,
                     gguf_model_config.mmproj,
@@ -292,21 +359,84 @@ class ModelManager:
     def filter_models_by_backend(self, models: dict) -> dict:
         """
         Returns a filtered dict of models that are enabled by the
-        current environment.
+        current environment and platform.
         """
+        import platform
         installed_packages = {dist.metadata["Name"].lower() for dist in distributions()}
         hybrid_installed = (
             "onnxruntime-vitisai" in installed_packages
             and "onnxruntime-genai-directml-ryzenai" in installed_packages
         )
+        from lemonade_install.install import (
+            check_ryzen_ai_processor,
+            UnsupportedPlatformError,
+        )
+        try:
+            check_ryzen_ai_processor()
+            ryzenai_npu_available = True
+        except UnsupportedPlatformError:
+            ryzenai_npu_available = False
+        # On macOS, only llamacpp (GGUF) models are supported, and only on Apple Silicon with macOS 14+
+        is_macos = platform.system() == "Darwin"
+        if is_macos:
+            machine = platform.machine().lower()
+            if machine == "x86_64":
+                # Intel Macs are not supported - return empty model list with error info
+                return {
+                    "_unsupported_platform_error": {
+                        "error": "Intel Mac Not Supported",
+                        "message": (
+                            "Lemonade Server requires Apple Silicon processors on macOS. "
+                            "Intel Macs are not currently supported. "
+                            "Please use a Mac with Apple Silicon or try Lemonade on Windows/Linux."
+                        ),
+                        "platform": f"macOS {machine}",
+                        "supported": "macOS 14+ with Apple Silicon (arm64/aarch64)",
+                    }
+                }
+            # Check macOS version requirement
+            mac_version = platform.mac_ver()[0]
+            if mac_version:
+                major_version = int(mac_version.split(".")[0])
+                if major_version < 14:
+                    return {
+                        "_unsupported_platform_error": {
+                            "error": "macOS Version Not Supported",
+                            "message": (
+                                f"Lemonade Server requires macOS 14 or later. "
+                                f"Your system is running macOS {mac_version}. "
+                                f"Please update your macOS version to use Lemonade Server."
+                            ),
+                            "platform": f"macOS {mac_version} {machine}",
+                            "supported": "macOS 14+ with Apple Silicon (arm64/aarch64)",
+                        }
+                    }
         filtered = {}
         for model, value in models.items():
-            if value.get("recipe") == "oga-hybrid":
-                if hybrid_installed:
-                    filtered[model] = value
-            else:
-                filtered[model] = value
+            recipe = value.get("recipe")
+            # Filter OGA hybrid models based on package availability
+            if recipe == "oga-hybrid":
+                if not hybrid_installed:
+                    continue
+            if recipe == "flm":
+                if not ryzenai_npu_available:
+                    continue
+            # On macOS, only show llamacpp models (GGUF format)
+            if is_macos and recipe != "llamacpp":
+                continue
+            filtered[model] = value
         return filtered
     def delete_model(self, model_name: str):
@@ -320,9 +450,20 @@ class ModelManager:
                 f"{list(self.supported_models.keys())}"
             )
-        checkpoint = self.supported_models[model_name]["checkpoint"]
+        model_info = self.supported_models[model_name]
+        checkpoint = model_info["checkpoint"]
         print(f"Deleting {model_name} ({checkpoint})")
+        # Handle FLM models
+        if model_info.get("recipe") == "flm":
+            try:
+                command = ["flm", "remove", checkpoint]
+                subprocess.run(command, check=True, encoding="utf-8", errors="replace")
+                print(f"Successfully deleted FLM model: {model_name}")
+                return
+            except subprocess.CalledProcessError as e:
+                raise ValueError(f"Failed to delete FLM model {model_name}: {e}") from e
         # Parse checkpoint to get base and variant
         base_checkpoint, variant = parse_checkpoint(checkpoint)

lemonade_server/pydantic_models.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import os
+import platform
 from typing import Optional, Union, List
 from pydantic import BaseModel
@@ -6,7 +7,28 @@ from pydantic import BaseModel
 DEFAULT_PORT = int(os.getenv("LEMONADE_PORT", "8000"))
 DEFAULT_HOST = os.getenv("LEMONADE_HOST", "localhost")
 DEFAULT_LOG_LEVEL = os.getenv("LEMONADE_LOG_LEVEL", "info")
-DEFAULT_LLAMACPP_BACKEND = os.getenv("LEMONADE_LLAMACPP", "vulkan")
+# Platform-aware default backend selection
+def _get_default_llamacpp_backend():
+    """
+    Get the default llamacpp backend based on the current platform.
+    """
+    # Allow environment variable override
+    env_backend = os.getenv("LEMONADE_LLAMACPP")
+    if env_backend:
+        return env_backend
+    # Platform-specific defaults: use metal for Apple Silicon, vulkan for everything else
+    if platform.system() == "Darwin" and platform.machine().lower() in [
+        "arm64",
+        "aarch64",
+    ]:
+        return "metal"
+    return "vulkan"
+DEFAULT_LLAMACPP_BACKEND = _get_default_llamacpp_backend()
 DEFAULT_CTX_SIZE = int(os.getenv("LEMONADE_CTX_SIZE", "4096"))
@@ -23,6 +45,8 @@ class LoadConfig(BaseModel):
     recipe: Optional[str] = None
     # Indicates whether the model is a reasoning model, like DeepSeek
     reasoning: Optional[bool] = False
+    # Indicates whether the model is a vision model with image processing capabilities
+    vision: Optional[bool] = False
     # Indicates which Multimodal Projector (mmproj) file to use
     mmproj: Optional[str] = None

lemonade-sdk 8.1.10__py3-none-any.whl → 8.1.12__py3-none-any.whl

Potentially problematic release.

lemonade-sdk 8.1.10py3-none-any.whl → 8.1.12py3-none-any.whl