PyPI - lemonade-sdk - Versions diffs - 8.1.4__py3-none-any.whl → 8.2.2__py3-none-any.whl - Mend

lemonade-sdk 8.1.4py3-none-any.whl → 8.2.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of lemonade-sdk might be problematic. Click here for more details.

Files changed (53) hide show

lemonade/cache.py +6 -1
lemonade/cli.py +47 -5
lemonade/common/inference_engines.py +13 -4
lemonade/common/status.py +4 -4
lemonade/common/system_info.py +544 -1
lemonade/profilers/agt_power.py +437 -0
lemonade/profilers/hwinfo_power.py +429 -0
lemonade/tools/accuracy.py +143 -48
lemonade/tools/adapter.py +6 -1
lemonade/tools/bench.py +26 -8
lemonade/tools/flm/__init__.py +1 -0
lemonade/tools/flm/utils.py +303 -0
lemonade/tools/huggingface/bench.py +6 -1
lemonade/tools/llamacpp/bench.py +146 -27
lemonade/tools/llamacpp/load.py +30 -2
lemonade/tools/llamacpp/utils.py +393 -33
lemonade/tools/oga/bench.py +5 -26
lemonade/tools/oga/load.py +60 -121
lemonade/tools/oga/migration.py +403 -0
lemonade/tools/report/table.py +76 -8
lemonade/tools/server/flm.py +133 -0
lemonade/tools/server/llamacpp.py +220 -553
lemonade/tools/server/serve.py +684 -168
lemonade/tools/server/static/js/chat.js +666 -342
lemonade/tools/server/static/js/model-settings.js +24 -3
lemonade/tools/server/static/js/models.js +597 -73
lemonade/tools/server/static/js/shared.js +79 -14
lemonade/tools/server/static/logs.html +191 -0
lemonade/tools/server/static/styles.css +491 -66
lemonade/tools/server/static/webapp.html +83 -31
lemonade/tools/server/tray.py +158 -38
lemonade/tools/server/utils/macos_tray.py +226 -0
lemonade/tools/server/utils/{system_tray.py → windows_tray.py} +13 -0
lemonade/tools/server/webapp.py +4 -1
lemonade/tools/server/wrapped_server.py +559 -0
lemonade/version.py +1 -1
lemonade_install/install.py +54 -611
{lemonade_sdk-8.1.4.dist-info → lemonade_sdk-8.2.2.dist-info}/METADATA +29 -72
lemonade_sdk-8.2.2.dist-info/RECORD +83 -0
lemonade_server/cli.py +145 -37
lemonade_server/model_manager.py +521 -37
lemonade_server/pydantic_models.py +28 -1
lemonade_server/server_models.json +246 -92
lemonade_server/settings.py +39 -39
lemonade/tools/quark/__init__.py +0 -0
lemonade/tools/quark/quark_load.py +0 -173
lemonade/tools/quark/quark_quantize.py +0 -439
lemonade_sdk-8.1.4.dist-info/RECORD +0 -77
{lemonade_sdk-8.1.4.dist-info → lemonade_sdk-8.2.2.dist-info}/WHEEL +0 -0
{lemonade_sdk-8.1.4.dist-info → lemonade_sdk-8.2.2.dist-info}/entry_points.txt +0 -0
{lemonade_sdk-8.1.4.dist-info → lemonade_sdk-8.2.2.dist-info}/licenses/LICENSE +0 -0
{lemonade_sdk-8.1.4.dist-info → lemonade_sdk-8.2.2.dist-info}/licenses/NOTICE.md +0 -0
{lemonade_sdk-8.1.4.dist-info → lemonade_sdk-8.2.2.dist-info}/top_level.txt +0 -0

lemonade_server/pydantic_models.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import os
+import platform
 from typing import Optional, Union, List
 from pydantic import BaseModel
@@ -6,7 +7,28 @@ from pydantic import BaseModel
 DEFAULT_PORT = int(os.getenv("LEMONADE_PORT", "8000"))
 DEFAULT_HOST = os.getenv("LEMONADE_HOST", "localhost")
 DEFAULT_LOG_LEVEL = os.getenv("LEMONADE_LOG_LEVEL", "info")
-DEFAULT_LLAMACPP_BACKEND = os.getenv("LEMONADE_LLAMACPP", "vulkan")
+# Platform-aware default backend selection
+def _get_default_llamacpp_backend():
+    """
+    Get the default llamacpp backend based on the current platform.
+    """
+    # Allow environment variable override
+    env_backend = os.getenv("LEMONADE_LLAMACPP")
+    if env_backend:
+        return env_backend
+    # Platform-specific defaults: use metal for Apple Silicon, vulkan for everything else
+    if platform.system() == "Darwin" and platform.machine().lower() in [
+        "arm64",
+        "aarch64",
+    ]:
+        return "metal"
+    return "vulkan"
+DEFAULT_LLAMACPP_BACKEND = _get_default_llamacpp_backend()
 DEFAULT_CTX_SIZE = int(os.getenv("LEMONADE_CTX_SIZE", "4096"))
@@ -23,6 +45,8 @@ class LoadConfig(BaseModel):
     recipe: Optional[str] = None
     # Indicates whether the model is a reasoning model, like DeepSeek
     reasoning: Optional[bool] = False
+    # Indicates whether the model is a vision model with image processing capabilities
+    vision: Optional[bool] = False
     # Indicates which Multimodal Projector (mmproj) file to use
     mmproj: Optional[str] = None
@@ -46,6 +70,7 @@ class CompletionRequest(BaseModel):
     top_k: int | None = None
     top_p: float | None = None
     max_tokens: int | None = None
+    enable_thinking: bool | None = True
 class ChatCompletionRequest(BaseModel):
@@ -69,6 +94,7 @@ class ChatCompletionRequest(BaseModel):
     max_tokens: int | None = None
     max_completion_tokens: int | None = None
     response_format: dict | None = None
+    enable_thinking: bool | None = True
 class EmbeddingsRequest(BaseModel):
@@ -108,6 +134,7 @@ class ResponsesRequest(BaseModel):
     top_k: int | None = None
     top_p: float | None = None
     stream: bool = False
+    enable_thinking: bool | None = True
 class PullConfig(LoadConfig):

lemonade_server/server_models.json CHANGED Viewed

@@ -2,327 +2,481 @@
     "Qwen2.5-0.5B-Instruct-CPU": {
         "checkpoint": "amd/Qwen2.5-0.5B-Instruct-quantized_int4-float16-cpu-onnx",
         "recipe": "oga-cpu",
-        "suggested": true
+        "suggested": true,
+        "size": 0.77
     },
     "Llama-3.2-1B-Instruct-CPU": {
         "checkpoint": "amd/Llama-3.2-1B-Instruct-awq-uint4-float16-cpu-onnx",
         "recipe": "oga-cpu",
-        "suggested": false
+        "suggested": false,
+        "size": 1.64
     },
     "Llama-3.2-3B-Instruct-CPU": {
         "checkpoint": "amd/Llama-3.2-3B-Instruct-awq-uint4-float16-cpu-onnx",
         "recipe": "oga-cpu",
-        "suggested": false
+        "suggested": false,
+        "size": 3.15
     },
     "Phi-3-Mini-Instruct-CPU": {
         "checkpoint": "amd/Phi-3-mini-4k-instruct_int4_float16_onnx_cpu",
         "recipe": "oga-cpu",
-        "suggested": true
+        "suggested": true,
+        "size": 2.23
     },
     "Qwen-1.5-7B-Chat-CPU": {
         "checkpoint": "amd/Qwen1.5-7B-Chat_uint4_asym_g128_float16_onnx_cpu",
         "recipe": "oga-cpu",
-        "suggested": true
+        "suggested": true,
+        "size": 5.89
     },
     "DeepSeek-R1-Distill-Llama-8B-CPU": {
         "checkpoint": "amd/DeepSeek-R1-Distill-Llama-8B-awq-asym-uint4-g128-lmhead-onnx-cpu",
         "recipe": "oga-cpu",
         "suggested": true,
-        "labels": ["reasoning"]
+        "labels": ["reasoning"],
+        "size": 5.78
     },
     "DeepSeek-R1-Distill-Qwen-7B-CPU": {
         "checkpoint": "amd/DeepSeek-R1-Distill-Llama-8B-awq-asym-uint4-g128-lmhead-onnx-cpu",
         "recipe": "oga-cpu",
         "suggested": true,
-        "labels": ["reasoning"]
+        "labels": ["reasoning"],
+        "size": 5.78
     },
     "Llama-3.2-1B-Instruct-Hybrid": {
-        "checkpoint": "amd/Llama-3.2-1B-Instruct-awq-g128-int4-asym-fp16-onnx-hybrid",
+        "checkpoint": "amd/Llama-3.2-1B-Instruct-onnx-ryzenai-hybrid",
         "recipe": "oga-hybrid",
-        "suggested": true
+        "suggested": true,
+        "size": 1.89
     },
     "Llama-3.2-3B-Instruct-Hybrid": {
-        "checkpoint": "amd/Llama-3.2-3B-Instruct-awq-g128-int4-asym-fp16-onnx-hybrid",
+        "checkpoint": "amd/Llama-3.2-3B-Instruct-onnx-ryzenai-hybrid",
         "recipe": "oga-hybrid",
-        "suggested": true
+        "suggested": true,
+        "size": 4.28
     },
     "Phi-3-Mini-Instruct-Hybrid": {
-        "checkpoint": "amd/Phi-3-mini-4k-instruct-awq-g128-int4-asym-fp16-onnx-hybrid",
+        "checkpoint": "amd/Phi-3-mini-4k-instruct-onnx-ryzenai-hybrid",
         "recipe": "oga-hybrid",
-        "suggested": true
+        "suggested": true,
+        "size": 4.18
     },
     "Phi-3.5-Mini-Instruct-Hybrid": {
-        "checkpoint": "amd/Phi-3.5-mini-instruct-awq-g128-int4-asym-fp16-onnx-hybrid",
+        "checkpoint": "amd/Phi-3.5-mini-instruct-onnx-ryzenai-hybrid",
         "recipe": "oga-hybrid",
-        "suggested": false
+        "suggested": false,
+        "size": 4.21
     },
     "Qwen-1.5-7B-Chat-Hybrid": {
-        "checkpoint": "amd/Qwen1.5-7B-Chat-awq-g128-int4-asym-fp16-onnx-hybrid",
+        "checkpoint": "amd/Qwen1.5-7B-Chat-onnx-ryzenai-hybrid",
         "recipe": "oga-hybrid",
-        "suggested": true
+        "suggested": true,
+        "size": 8.83
     },
     "Qwen-2.5-7B-Instruct-Hybrid": {
-        "checkpoint": "amd/Qwen2.5-7B-Instruct-awq-uint4-asym-g128-lmhead-g32-fp16-onnx-hybrid",
+        "checkpoint": "amd/Qwen2.5-7B-Instruct-onnx-ryzenai-hybrid",
         "recipe": "oga-hybrid",
-        "suggested": true
+        "suggested": true,
+        "size": 8.65
     },
     "Qwen-2.5-3B-Instruct-Hybrid": {
-        "checkpoint": "amd/Qwen2.5-3B-Instruct-awq-uint4-asym-g128-lmhead-g32-fp16-onnx-hybrid",
+        "checkpoint": "amd/Qwen2.5-3B-Instruct-onnx-ryzenai-hybrid",
         "recipe": "oga-hybrid",
-        "suggested": true
+        "suggested": true,
+        "size": 3.97
     },
     "Qwen-2.5-1.5B-Instruct-Hybrid": {
-        "checkpoint": "amd/Qwen2.5-1.5B-Instruct-awq-uint4-asym-g128-lmhead-g32-fp16-onnx-hybrid",
+    "checkpoint": "amd/Qwen2.5-1.5B-Instruct-onnx-ryzenai-hybrid",
         "recipe": "oga-hybrid",
-        "suggested": true
+        "suggested": true,
+        "size": 2.16
     },
     "DeepSeek-R1-Distill-Llama-8B-Hybrid": {
-        "checkpoint": "amd/DeepSeek-R1-Distill-Llama-8B-awq-asym-uint4-g128-lmhead-onnx-hybrid",
+        "checkpoint": "amd/DeepSeek-R1-Distill-Llama-8B-onnx-ryzenai-hybrid",
         "recipe": "oga-hybrid",
         "suggested": true,
-        "labels": ["reasoning"]
+        "labels": ["reasoning"],
+        "size": 9.09
     },
     "DeepSeek-R1-Distill-Qwen-7B-Hybrid": {
-        "checkpoint": "amd/DeepSeek-R1-Distill-Qwen-7B-awq-asym-uint4-g128-lmhead-onnx-hybrid",
+        "checkpoint": "amd/DeepSeek-R1-Distill-Qwen-7B-onnx-ryzenai-hybrid",
         "recipe": "oga-hybrid",
         "max_prompt_length": 2000,
         "suggested": false,
-        "labels": ["reasoning"]
+        "labels": ["reasoning"],
+        "size": 8.67
     },
     "Mistral-7B-v0.3-Instruct-Hybrid": {
-        "checkpoint": "amd/Mistral-7B-Instruct-v0.3-awq-g128-int4-asym-fp16-onnx-hybrid",
+        "checkpoint": "amd/Mistral-7B-Instruct-v0.3-onnx-ryzenai-hybrid",
         "recipe": "oga-hybrid",
-        "suggested": true
+        "suggested": true,
+        "size": 7.85
     },
     "Llama-3.1-8B-Instruct-Hybrid": {
-        "checkpoint": "amd/Llama-3.1-8B-Instruct-awq-asym-uint4-g128-lmhead-onnx-hybrid",
+        "checkpoint": "amd/Meta-Llama-3.1-8B-Instruct-onnx-ryzenai-hybrid",
+        "recipe": "oga-hybrid",
+        "suggested": true,
+        "size": 9.09
+    },
+    "Qwen3-1.7B-Hybrid": {
+        "checkpoint": "amd/Qwen3-1.7B-awq-quant-onnx-hybrid",
+        "recipe": "oga-hybrid",
+        "suggested": true,
+        "labels": ["reasoning"],
+        "size": 2.55
+    },
+    "Phi-4-Mini-Instruct-Hybrid": {
+        "checkpoint": "amd/Phi-4-mini-instruct-onnx-ryzenai-hybrid",
         "recipe": "oga-hybrid",
-        "suggested": true
+        "suggested": true,
+        "size": 5.46
+    },
+    "Qwen3-4B-Hybrid": {
+        "checkpoint": "amd/Qwen3-4B-awq-quant-onnx-hybrid",
+        "recipe": "oga-hybrid",
+        "suggested": true,
+        "labels": ["reasoning"],
+        "size": 5.17
     },
-    "Llama-xLAM-2-8b-fc-r-Hybrid": {
-        "checkpoint": "amd/Llama-xLAM-2-8b-fc-r-awq-g128-int4-asym-bfp16-onnx-hybrid",
+    "Qwen3-8B-Hybrid": {
+        "checkpoint": "amd/Qwen3-8B-awq-quant-onnx-hybrid",
         "recipe": "oga-hybrid",
-        "suggested": true
+        "suggested": true,
+        "labels": ["reasoning"],
+        "size": 9.42
     },
     "Qwen-2.5-7B-Instruct-NPU": {
-        "checkpoint": "amd/Qwen2.5-7B-Instruct-awq-g128-int4-asym-bf16-onnx-ryzen-strix",
+        "checkpoint": "amd/Qwen2.5-7B-Instruct-onnx-ryzenai-npu",
         "recipe": "oga-npu",
-        "suggested": true
+        "suggested": true,
+        "size": 8.82
     },
-    "Qwen-2.5-1.5B-Instruct-NPU": {
-        "checkpoint": "amd/Qwen2.5-1.5B-Instruct-awq-g128-int4-asym-bf16-onnx-ryzen-strix",
+    "Qwen-2.5-3B-Instruct-NPU": {
+        "checkpoint": "amd/Qwen2.5-3B-Instruct-onnx-ryzenai-npu",
         "recipe": "oga-npu",
-        "suggested": true
+        "suggested": true,
+        "size": 4.09
     },
     "DeepSeek-R1-Distill-Llama-8B-NPU": {
-        "checkpoint": "amd/DeepSeek-R1-Distill-Llama-8B-awq-g128-int4-asym-bf16-onnx-ryzen-strix",
+        "checkpoint": "amd/DeepSeek-R1-Distill-Llama-8B-onnx-ryzenai-npu",
         "recipe": "oga-npu",
-        "suggested": true
+        "suggested": true,
+        "size": 9.30
     },
     "DeepSeek-R1-Distill-Qwen-7B-NPU": {
-        "checkpoint": "amd/DeepSeek-R1-Distill-Qwen-7B-awq-g128-int4-asym-bf16-onnx-ryzen-strix",
+        "checkpoint": "amd/DeepSeek-R1-Distill-Qwen-7B-onnx-ryzenai-npu",
         "recipe": "oga-npu",
-        "suggested": false
+        "suggested": false,
+        "size": 8.87
     },
     "DeepSeek-R1-Distill-Qwen-1.5B-NPU": {
-        "checkpoint": "amd/DeepSeek-R1-Distill-Qwen-1.5B-awq-g128-int4-asym-bf16-onnx-ryzen-strix",
+        "checkpoint": "amd/DeepSeek-R1-Distill-Qwen-1.5B-onnx-ryzenai-npu",
         "recipe": "oga-npu",
-        "suggested": false
-    },
-    "Llama-3.2-3B-Instruct-NPU": {
-        "checkpoint": "amd/Llama-3.2-3B-Instruct-awq-g128-int4-asym-bf16-onnx-ryzen-strix",
-        "recipe": "oga-npu",
-        "suggested": false
+        "suggested": false,
+        "size": 2.30
     },
     "Llama-3.2-1B-Instruct-NPU": {
-        "checkpoint": "amd/Llama-3.2-1B-Instruct-awq-g128-int4-asym-bf16-onnx-ryzen-strix",
+        "checkpoint": "amd/Llama-3.2-1B-Instruct-onnx-ryzenai-npu",
         "recipe": "oga-npu",
-        "suggested": false
+        "suggested": false,
+        "size": 1.96
     },
     "Mistral-7B-v0.3-Instruct-NPU": {
-        "checkpoint": "amd/Mistral-7B-Instruct-v0.3-awq-g128-int4-asym-bf16-onnx-ryzen-strix",
+        "checkpoint": "amd/Mistral-7B-Instruct-v0.3-onnx-ryzenai-npu",
         "recipe": "oga-npu",
-        "suggested": true
+        "suggested": true,
+        "size": 8.09
     },
     "Phi-3.5-Mini-Instruct-NPU": {
-        "checkpoint": "amd/Phi-3.5-mini-instruct-awq-g128-int4-asym-bf16-onnx-ryzen-strix",
+        "checkpoint": "amd/Phi-3.5-mini-instruct-onnx-ryzenai-npu",
         "recipe": "oga-npu",
-        "suggested": true
+        "suggested": true,
+        "size": 4.35
     },
     "ChatGLM-3-6b-Instruct-NPU": {
-        "checkpoint": "amd/chatglm3-6b-awq-g128-int4-asym-bf16-onnx-ryzen-strix",
-        "recipe": "oga-npu",
-        "suggested": false
-    },
-    "AMD-OLMo-1B-Instruct-NPU": {
-        "checkpoint": "amd/AMD-OLMo-1B-SFT-DPO-awq-g128-int4-asym-bf16-onnx-ryzen-strix",
+        "checkpoint": "amd/chatglm3-6b-onnx-ryzenai-npu",
         "recipe": "oga-npu",
-        "suggested": false
+        "suggested": false,
+        "size": 7.03
     },
     "Llama-3.2-1B-Instruct-DirectML": {
         "checkpoint": "amd/Llama-3.2-1B-Instruct-dml-int4-awq-block-128-directml",
         "recipe": "oga-igpu",
-        "suggested": false
+        "suggested": false,
+        "size": 2.81
     },
     "Llama-3.2-3B-Instruct-DirectML": {
         "checkpoint": "amd/Llama-3.2-3B-Instruct-dml-int4-awq-block-128-directml",
         "recipe": "oga-igpu",
-        "suggested": false
+        "suggested": false,
+        "size": 6.75
     },
     "Phi-3.5-Mini-Instruct-DirectML": {
         "checkpoint": "amd/phi3.5-mini-instruct-int4-awq-block-128-directml",
         "recipe": "oga-igpu",
-        "suggested": false
+        "suggested": false,
+        "size": 2.14
     },
     "Qwen-1.5-7B-Chat-DirectML": {
         "checkpoint": "amd/Qwen1.5-7B-Chat-dml-int4-awq-block-128-directml",
         "recipe": "oga-igpu",
-        "suggested": false
+        "suggested": false,
+        "size": 3.73
     },
     "Mistral-7B-v0.1-Instruct-DirectML": {
         "checkpoint": "amd/Mistral-7B-Instruct-v0.1-awq-g128-int4-onnx-directml",
         "recipe": "oga-igpu",
-        "suggested": false
+        "suggested": false,
+        "size": 3.67
     },
     "Llama-3-8B-Instruct-DirectML": {
         "checkpoint": "amd/llama3-8b-instruct-awq-g128-int4-onnx-directml",
         "recipe": "oga-igpu",
-        "suggested": false
+        "suggested": false,
+        "size": 4.61
     },
     "Qwen3-0.6B-GGUF": {
         "checkpoint": "unsloth/Qwen3-0.6B-GGUF:Q4_0",
         "recipe": "llamacpp",
         "suggested": true,
-        "labels": ["reasoning"]
+        "labels": ["reasoning"],
+        "size": 0.38
     },
     "Qwen3-1.7B-GGUF": {
         "checkpoint": "unsloth/Qwen3-1.7B-GGUF:Q4_0",
         "recipe": "llamacpp",
         "suggested": true,
-        "labels": ["reasoning"]
+        "labels": ["reasoning"],
+        "size": 1.06
     },
     "Qwen3-4B-GGUF": {
         "checkpoint": "unsloth/Qwen3-4B-GGUF:Q4_0",
         "recipe": "llamacpp",
         "suggested": true,
-        "labels": ["reasoning"]
+        "labels": ["reasoning"],
+        "size": 2.38
     },
     "Qwen3-8B-GGUF": {
         "checkpoint": "unsloth/Qwen3-8B-GGUF:Q4_1",
         "recipe": "llamacpp",
         "suggested": true,
-        "labels": ["reasoning"]
+        "labels": ["reasoning"],
+        "size": 5.25
     },
     "DeepSeek-Qwen3-8B-GGUF": {
         "checkpoint": "unsloth/DeepSeek-R1-0528-Qwen3-8B-GGUF:Q4_1",
         "recipe": "llamacpp",
         "suggested": true,
-        "labels": ["reasoning"]
+        "labels": ["reasoning"],
+        "size": 5.25
     },
     "Qwen3-14B-GGUF": {
         "checkpoint": "unsloth/Qwen3-14B-GGUF:Q4_0",
         "recipe": "llamacpp",
         "suggested": true,
-        "labels": ["reasoning"]
+        "labels": ["reasoning"],
+        "size": 8.54
+    },
+    "Qwen3-4B-Instruct-2507-GGUF": {
+        "checkpoint": "unsloth/Qwen3-4B-Instruct-2507-GGUF:Qwen3-4B-Instruct-2507-Q4_K_M.gguf",
+        "recipe": "llamacpp",
+        "suggested": true,
+        "labels": ["hot"],
+        "size": 2.5
     },
     "Qwen3-30B-A3B-GGUF": {
         "checkpoint": "unsloth/Qwen3-30B-A3B-GGUF:Q4_0",
         "recipe": "llamacpp",
         "suggested": true,
-        "labels": ["reasoning"]
+        "labels": ["reasoning"],
+        "size": 17.4
     },
     "Qwen3-30B-A3B-Instruct-2507-GGUF": {
         "checkpoint": "unsloth/Qwen3-30B-A3B-Instruct-2507-GGUF:Qwen3-30B-A3B-Instruct-2507-Q4_0.gguf",
         "recipe": "llamacpp",
         "suggested": true,
-        "labels": ["hot"]
+        "size": 17.4
     },
     "Qwen3-Coder-30B-A3B-Instruct-GGUF": {
         "checkpoint": "unsloth/Qwen3-Coder-30B-A3B-Instruct-GGUF:Qwen3-Coder-30B-A3B-Instruct-Q4_K_M.gguf",
         "recipe": "llamacpp",
         "suggested": true,
-        "labels": ["coding","hot"]
+        "labels": ["coding","tool-calling","hot"],
+        "size": 18.6
     },
     "Gemma-3-4b-it-GGUF": {
         "checkpoint": "ggml-org/gemma-3-4b-it-GGUF:Q4_K_M",
         "mmproj": "mmproj-model-f16.gguf",
         "recipe": "llamacpp",
         "suggested": true,
-        "labels": ["vision"]
+        "labels": ["hot","vision"],
+        "size": 3.61
     },
     "Qwen2.5-VL-7B-Instruct-GGUF": {
         "checkpoint": "ggml-org/Qwen2.5-VL-7B-Instruct-GGUF:Q4_K_M",
         "mmproj": "mmproj-Qwen2.5-VL-7B-Instruct-f16.gguf",
         "recipe": "llamacpp",
         "suggested": true,
-        "labels": ["vision"]
+        "labels": ["vision"],
+        "size": 4.68
     },
     "Llama-4-Scout-17B-16E-Instruct-GGUF": {
         "checkpoint": "unsloth/Llama-4-Scout-17B-16E-Instruct-GGUF:Q4_K_S",
         "mmproj": "mmproj-F16.gguf",
         "recipe": "llamacpp",
         "suggested": true,
-        "labels": ["vision"]
+        "labels": ["vision"],
+        "size": 61.5
     },
     "Cogito-v2-llama-109B-MoE-GGUF": {
         "checkpoint": "unsloth/cogito-v2-preview-llama-109B-MoE-GGUF:Q4_K_M",
         "mmproj": "mmproj-F16.gguf",
         "recipe": "llamacpp",
         "suggested": false,
-        "labels": ["vision"]
+        "labels": ["vision"],
+        "size": 65.3
     },
     "nomic-embed-text-v1-GGUF": {
         "checkpoint": "nomic-ai/nomic-embed-text-v1-GGUF:Q4_K_S",
         "recipe": "llamacpp",
         "suggested": true,
-        "labels": ["embeddings"]
+        "labels": ["embeddings"],
+        "size": 0.0781
     },
     "nomic-embed-text-v2-moe-GGUF": {
         "checkpoint": "nomic-ai/nomic-embed-text-v2-moe-GGUF:Q8_0",
         "recipe": "llamacpp",
         "suggested": true,
-        "labels": ["embeddings"]
+        "labels": ["embeddings"],
+        "size": 0.51
     },
     "bge-reranker-v2-m3-GGUF": {
         "checkpoint": "pqnet/bge-reranker-v2-m3-Q8_0-GGUF",
         "recipe": "llamacpp",
         "suggested": true,
-        "labels": ["reranking"]
+        "labels": ["reranking"],
+        "size": 0.53
     },
     "jina-reranker-v1-tiny-en-GGUF": {
         "checkpoint": "mradermacher/jina-reranker-v1-tiny-en-GGUF:Q8_0",
         "recipe": "llamacpp",
         "suggested": false,
-        "labels": ["reranking"]
+        "labels": ["reranking"],
+        "size": 0.03
     },
     "Devstral-Small-2507-GGUF":{
         "checkpoint": "mistralai/Devstral-Small-2507_gguf:Q4_K_M",
         "recipe": "llamacpp",
         "suggested": true,
-        "labels": ["coding"]
+        "labels": ["coding","tool-calling"],
+        "size": 14.3
     },
     "Qwen2.5-Coder-32B-Instruct-GGUF": {
         "checkpoint": "Qwen/Qwen2.5-Coder-32B-Instruct-GGUF:Q4_K_M",
         "recipe": "llamacpp",
         "suggested": true,
-        "labels": ["coding"]
+        "labels": ["coding"],
+        "size": 19.85
     },
     "gpt-oss-120b-GGUF": {
         "checkpoint": "unsloth/gpt-oss-120b-GGUF:Q4_K_M",
         "recipe": "llamacpp",
-        "suggested": true,
-        "labels": ["hot", "reasoning"]
+        "suggested": false,
+        "labels": ["reasoning", "tool-calling"],
+        "size": 62.7
     },
     "gpt-oss-20b-GGUF": {
         "checkpoint": "unsloth/gpt-oss-20b-GGUF:Q4_K_M",
         "recipe": "llamacpp",
+        "suggested": false,
+        "labels": ["reasoning", "tool-calling"],
+        "size": 11.6
+    },
+    "gpt-oss-120b-mxfp-GGUF": {
+        "checkpoint": "ggml-org/gpt-oss-120b-GGUF:*",
+        "recipe": "llamacpp",
         "suggested": true,
-        "labels": ["hot", "reasoning"]
+        "labels": ["hot", "reasoning", "tool-calling"],
+        "size": 63.3
+    },
+    "gpt-oss-20b-mxfp4-GGUF": {
+        "checkpoint": "ggml-org/gpt-oss-20b-GGUF",
+        "recipe": "llamacpp",
+        "suggested": true,
+        "labels": ["hot", "reasoning", "tool-calling"],
+        "size": 12.1
     },
     "GLM-4.5-Air-UD-Q4K-XL-GGUF": {
         "checkpoint": "unsloth/GLM-4.5-Air-GGUF:UD-Q4_K_XL",
         "recipe": "llamacpp",
         "suggested": true,
-        "labels": ["reasoning","hot"]
+        "labels": ["reasoning"],
+        "size": 73.1
+    },
+    "Playable1-GGUF": {
+        "checkpoint": "playable/Playable1-GGUF:Playable1-q4_k_m.gguf",
+        "recipe": "llamacpp",
+        "suggested": false,
+        "labels": ["coding"],
+        "size": 4.68
+    },
+    "gpt-oss-20b-FLM": {
+        "checkpoint": "gpt-oss:20b",
+        "recipe": "flm",
+        "suggested": true,
+        "labels": ["reasoning"],
+        "size": 13.4
+    },
+    "Gemma3-1b-it-FLM": {
+        "checkpoint": "gemma3:1b",
+        "recipe": "flm",
+        "suggested": true,
+        "size": 1.17
+    },
+    "Gemma3-4b-it-FLM": {
+        "checkpoint": "gemma3:4b",
+        "recipe": "flm",
+        "suggested": true,
+        "labels": ["hot","vision"],
+        "size": 5.26
+    },
+    "Qwen3-0.6b-FLM": {
+        "checkpoint": "qwen3:0.6b",
+        "recipe": "flm",
+        "suggested": true,
+        "labels": ["reasoning"],
+        "size": 0.66
+    },
+    "Qwen3-4B-Instruct-2507-FLM": {
+        "checkpoint": "qwen3-it:4b",
+        "recipe": "flm",
+        "suggested": true,
+        "labels": ["hot"],
+        "size": 3.07
+    },
+    "Qwen3-8b-FLM": {
+        "checkpoint": "qwen3:8b",
+        "recipe": "flm",
+        "suggested": true,
+        "labels": ["reasoning"],
+        "size": 5.57
+    },
+    "Llama-3.1-8B-FLM": {
+        "checkpoint": "llama3.1:8b",
+        "recipe": "flm",
+        "suggested": true,
+        "size": 5.36
+    },
+    "Llama-3.2-1B-FLM": {
+        "checkpoint": "llama3.2:1b",
+        "recipe": "flm",
+        "suggested": true,
+        "size": 1.21
+    },
+    "Llama-3.2-3B-FLM": {
+        "checkpoint": "llama3.2:3b",
+        "recipe": "flm",
+        "suggested": true,
+        "size": 2.62
     }
 }

lemonade-sdk 8.1.4__py3-none-any.whl → 8.2.2__py3-none-any.whl

Potentially problematic release.

lemonade-sdk 8.1.4py3-none-any.whl → 8.2.2py3-none-any.whl