PyPI - lemonade-sdk - Versions diffs - 9.1.1__py3-none-any.whl - Mend

lemonade-sdk 9.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (84) hide show

lemonade/__init__.py +5 -0
lemonade/api.py +180 -0
lemonade/cache.py +92 -0
lemonade/cli.py +173 -0
lemonade/common/__init__.py +0 -0
lemonade/common/build.py +176 -0
lemonade/common/cli_helpers.py +139 -0
lemonade/common/exceptions.py +98 -0
lemonade/common/filesystem.py +368 -0
lemonade/common/inference_engines.py +408 -0
lemonade/common/network.py +93 -0
lemonade/common/printing.py +110 -0
lemonade/common/status.py +471 -0
lemonade/common/system_info.py +1411 -0
lemonade/common/test_helpers.py +28 -0
lemonade/profilers/__init__.py +1 -0
lemonade/profilers/agt_power.py +437 -0
lemonade/profilers/hwinfo_power.py +429 -0
lemonade/profilers/memory_tracker.py +259 -0
lemonade/profilers/profiler.py +58 -0
lemonade/sequence.py +363 -0
lemonade/state.py +159 -0
lemonade/tools/__init__.py +1 -0
lemonade/tools/accuracy.py +432 -0
lemonade/tools/adapter.py +114 -0
lemonade/tools/bench.py +302 -0
lemonade/tools/flm/__init__.py +1 -0
lemonade/tools/flm/utils.py +305 -0
lemonade/tools/huggingface/bench.py +187 -0
lemonade/tools/huggingface/load.py +235 -0
lemonade/tools/huggingface/utils.py +359 -0
lemonade/tools/humaneval.py +264 -0
lemonade/tools/llamacpp/bench.py +255 -0
lemonade/tools/llamacpp/load.py +222 -0
lemonade/tools/llamacpp/utils.py +1260 -0
lemonade/tools/management_tools.py +319 -0
lemonade/tools/mmlu.py +319 -0
lemonade/tools/oga/__init__.py +0 -0
lemonade/tools/oga/bench.py +120 -0
lemonade/tools/oga/load.py +804 -0
lemonade/tools/oga/migration.py +403 -0
lemonade/tools/oga/utils.py +462 -0
lemonade/tools/perplexity.py +147 -0
lemonade/tools/prompt.py +263 -0
lemonade/tools/report/__init__.py +0 -0
lemonade/tools/report/llm_report.py +203 -0
lemonade/tools/report/table.py +899 -0
lemonade/tools/server/__init__.py +0 -0
lemonade/tools/server/flm.py +133 -0
lemonade/tools/server/llamacpp.py +320 -0
lemonade/tools/server/serve.py +2123 -0
lemonade/tools/server/static/favicon.ico +0 -0
lemonade/tools/server/static/index.html +279 -0
lemonade/tools/server/static/js/chat.js +1059 -0
lemonade/tools/server/static/js/model-settings.js +183 -0
lemonade/tools/server/static/js/models.js +1395 -0
lemonade/tools/server/static/js/shared.js +556 -0
lemonade/tools/server/static/logs.html +191 -0
lemonade/tools/server/static/styles.css +2654 -0
lemonade/tools/server/static/webapp.html +321 -0
lemonade/tools/server/tool_calls.py +153 -0
lemonade/tools/server/tray.py +664 -0
lemonade/tools/server/utils/macos_tray.py +226 -0
lemonade/tools/server/utils/port.py +77 -0
lemonade/tools/server/utils/thread.py +85 -0
lemonade/tools/server/utils/windows_tray.py +408 -0
lemonade/tools/server/webapp.py +34 -0
lemonade/tools/server/wrapped_server.py +559 -0
lemonade/tools/tool.py +374 -0
lemonade/version.py +1 -0
lemonade_install/__init__.py +1 -0
lemonade_install/install.py +239 -0
lemonade_sdk-9.1.1.dist-info/METADATA +276 -0
lemonade_sdk-9.1.1.dist-info/RECORD +84 -0
lemonade_sdk-9.1.1.dist-info/WHEEL +5 -0
lemonade_sdk-9.1.1.dist-info/entry_points.txt +5 -0
lemonade_sdk-9.1.1.dist-info/licenses/LICENSE +201 -0
lemonade_sdk-9.1.1.dist-info/licenses/NOTICE.md +47 -0
lemonade_sdk-9.1.1.dist-info/top_level.txt +3 -0
lemonade_server/cli.py +805 -0
lemonade_server/model_manager.py +758 -0
lemonade_server/pydantic_models.py +159 -0
lemonade_server/server_models.json +643 -0
lemonade_server/settings.py +39 -0

lemonade_server/server_models.json ADDED Viewed

@@ -0,0 +1,643 @@
+{
+    "Qwen2.5-0.5B-Instruct-CPU": {
+        "checkpoint": "amd/Qwen2.5-0.5B-Instruct-quantized_int4-float16-cpu-onnx",
+        "recipe": "oga-cpu",
+        "suggested": true,
+        "size": 0.77
+    },
+    "Llama-3.2-1B-Instruct-CPU": {
+        "checkpoint": "amd/Llama-3.2-1B-Instruct-awq-uint4-float16-cpu-onnx",
+        "recipe": "oga-cpu",
+        "suggested": false,
+        "size": 1.64
+    },
+    "Llama-3.2-3B-Instruct-CPU": {
+        "checkpoint": "amd/Llama-3.2-3B-Instruct-awq-uint4-float16-cpu-onnx",
+        "recipe": "oga-cpu",
+        "suggested": false,
+        "size": 3.15
+    },
+    "Phi-3-Mini-Instruct-CPU": {
+        "checkpoint": "amd/Phi-3-mini-4k-instruct_int4_float16_onnx_cpu",
+        "recipe": "oga-cpu",
+        "suggested": true,
+        "size": 2.23
+    },
+    "Qwen-1.5-7B-Chat-CPU": {
+        "checkpoint": "amd/Qwen1.5-7B-Chat_uint4_asym_g128_float16_onnx_cpu",
+        "recipe": "oga-cpu",
+        "suggested": true,
+        "size": 5.89
+    },
+    "DeepSeek-R1-Distill-Llama-8B-CPU": {
+        "checkpoint": "amd/DeepSeek-R1-Distill-Llama-8B-awq-asym-uint4-g128-lmhead-onnx-cpu",
+        "recipe": "oga-cpu",
+        "suggested": true,
+        "labels": ["reasoning"],
+        "size": 5.78
+    },
+    "DeepSeek-R1-Distill-Qwen-7B-CPU": {
+        "checkpoint": "amd/DeepSeek-R1-Distill-Llama-8B-awq-asym-uint4-g128-lmhead-onnx-cpu",
+        "recipe": "oga-cpu",
+        "suggested": true,
+        "labels": ["reasoning"],
+        "size": 5.78
+    },
+    "Llama-3.2-1B-Instruct-Hybrid": {
+        "checkpoint": "amd/Llama-3.2-1B-Instruct-onnx-ryzenai-hybrid",
+        "recipe": "oga-hybrid",
+        "suggested": true,
+        "size": 1.89
+    },
+    "Llama-3.2-3B-Instruct-Hybrid": {
+        "checkpoint": "amd/Llama-3.2-3B-Instruct-onnx-ryzenai-hybrid",
+        "recipe": "oga-hybrid",
+        "suggested": true,
+        "size": 4.28
+    },
+    "Phi-3-Mini-Instruct-Hybrid": {
+        "checkpoint": "amd/Phi-3-mini-4k-instruct-onnx-ryzenai-hybrid",
+        "recipe": "oga-hybrid",
+        "suggested": true,
+        "size": 4.18
+    },
+    "Phi-3.5-Mini-Instruct-Hybrid": {
+        "checkpoint": "amd/Phi-3.5-mini-instruct-onnx-ryzenai-hybrid",
+        "recipe": "oga-hybrid",
+        "suggested": false,
+        "size": 4.21
+    },
+    "Qwen-1.5-7B-Chat-Hybrid": {
+        "checkpoint": "amd/Qwen1.5-7B-Chat-onnx-ryzenai-hybrid",
+        "recipe": "oga-hybrid",
+        "suggested": true,
+        "size": 8.83
+    },
+    "Qwen-2.5-7B-Instruct-Hybrid": {
+        "checkpoint": "amd/Qwen2.5-7B-Instruct-onnx-ryzenai-hybrid",
+        "recipe": "oga-hybrid",
+        "suggested": true,
+        "size": 8.65
+    },
+    "Qwen-2.5-3B-Instruct-Hybrid": {
+        "checkpoint": "amd/Qwen2.5-3B-Instruct-onnx-ryzenai-hybrid",
+        "recipe": "oga-hybrid",
+        "suggested": true,
+        "size": 3.97
+    },
+    "Qwen-2.5-1.5B-Instruct-Hybrid": {
+    "checkpoint": "amd/Qwen2.5-1.5B-Instruct-onnx-ryzenai-hybrid",
+        "recipe": "oga-hybrid",
+        "suggested": true,
+        "size": 2.16
+    },
+    "DeepSeek-R1-Distill-Llama-8B-Hybrid": {
+        "checkpoint": "amd/DeepSeek-R1-Distill-Llama-8B-onnx-ryzenai-hybrid",
+        "recipe": "oga-hybrid",
+        "suggested": true,
+        "labels": ["reasoning"],
+        "size": 9.09
+    },
+    "DeepSeek-R1-Distill-Qwen-7B-Hybrid": {
+        "checkpoint": "amd/DeepSeek-R1-Distill-Qwen-7B-onnx-ryzenai-hybrid",
+        "recipe": "oga-hybrid",
+        "max_prompt_length": 2000,
+        "suggested": false,
+        "labels": ["reasoning"],
+        "size": 8.67
+    },
+    "Mistral-7B-v0.3-Instruct-Hybrid": {
+        "checkpoint": "amd/Mistral-7B-Instruct-v0.3-onnx-ryzenai-hybrid",
+        "recipe": "oga-hybrid",
+        "suggested": true,
+        "size": 7.85
+    },
+    "Llama-3.1-8B-Instruct-Hybrid": {
+        "checkpoint": "amd/Meta-Llama-3.1-8B-Instruct-onnx-ryzenai-hybrid",
+        "recipe": "oga-hybrid",
+        "suggested": true,
+        "size": 9.09
+    },
+    "Qwen3-1.7B-Hybrid": {
+        "checkpoint": "amd/Qwen3-1.7B-awq-quant-onnx-hybrid",
+        "recipe": "oga-hybrid",
+        "suggested": true,
+        "labels": ["reasoning"],
+        "size": 2.55
+    },
+    "Phi-4-Mini-Instruct-Hybrid": {
+        "checkpoint": "amd/Phi-4-mini-instruct-onnx-ryzenai-hybrid",
+        "recipe": "oga-hybrid",
+        "suggested": true,
+        "size": 5.46
+    },
+    "Qwen3-4B-Hybrid": {
+        "checkpoint": "amd/Qwen3-4B-awq-quant-onnx-hybrid",
+        "recipe": "oga-hybrid",
+        "suggested": true,
+        "labels": ["reasoning"],
+        "size": 5.17
+    },
+    "Qwen3-8B-Hybrid": {
+        "checkpoint": "amd/Qwen3-8B-awq-quant-onnx-hybrid",
+        "recipe": "oga-hybrid",
+        "suggested": true,
+        "labels": ["reasoning"],
+        "size": 9.42
+    },
+    "Qwen-2.5-7B-Instruct-NPU": {
+        "checkpoint": "amd/Qwen2.5-7B-Instruct-onnx-ryzenai-npu",
+        "recipe": "oga-npu",
+        "suggested": true,
+        "size": 8.82
+    },
+    "Qwen-2.5-3B-Instruct-NPU": {
+        "checkpoint": "amd/Qwen2.5-3B-Instruct-onnx-ryzenai-npu",
+        "recipe": "oga-npu",
+        "suggested": true,
+        "size": 4.09
+    },
+    "DeepSeek-R1-Distill-Llama-8B-NPU": {
+        "checkpoint": "amd/DeepSeek-R1-Distill-Llama-8B-onnx-ryzenai-npu",
+        "recipe": "oga-npu",
+        "suggested": true,
+        "size": 9.30
+    },
+    "DeepSeek-R1-Distill-Qwen-7B-NPU": {
+        "checkpoint": "amd/DeepSeek-R1-Distill-Qwen-7B-onnx-ryzenai-npu",
+        "recipe": "oga-npu",
+        "suggested": false,
+        "size": 8.87
+    },
+    "DeepSeek-R1-Distill-Qwen-1.5B-NPU": {
+        "checkpoint": "amd/DeepSeek-R1-Distill-Qwen-1.5B-onnx-ryzenai-npu",
+        "recipe": "oga-npu",
+        "suggested": false,
+        "size": 2.30
+    },
+    "Llama-3.2-1B-Instruct-NPU": {
+        "checkpoint": "amd/Llama-3.2-1B-Instruct-onnx-ryzenai-npu",
+        "recipe": "oga-npu",
+        "suggested": false,
+        "size": 1.96
+    },
+    "Mistral-7B-v0.3-Instruct-NPU": {
+        "checkpoint": "amd/Mistral-7B-Instruct-v0.3-onnx-ryzenai-npu",
+        "recipe": "oga-npu",
+        "suggested": true,
+        "size": 8.09
+    },
+    "Phi-3.5-Mini-Instruct-NPU": {
+        "checkpoint": "amd/Phi-3.5-mini-instruct-onnx-ryzenai-npu",
+        "recipe": "oga-npu",
+        "suggested": true,
+        "size": 4.35
+    },
+    "ChatGLM-3-6b-Instruct-NPU": {
+        "checkpoint": "amd/chatglm3-6b-onnx-ryzenai-npu",
+        "recipe": "oga-npu",
+        "suggested": false,
+        "size": 7.03
+    },
+    "Llama-3.2-1B-Instruct-DirectML": {
+        "checkpoint": "amd/Llama-3.2-1B-Instruct-dml-int4-awq-block-128-directml",
+        "recipe": "oga-igpu",
+        "suggested": false,
+        "size": 2.81
+    },
+    "Llama-3.2-3B-Instruct-DirectML": {
+        "checkpoint": "amd/Llama-3.2-3B-Instruct-dml-int4-awq-block-128-directml",
+        "recipe": "oga-igpu",
+        "suggested": false,
+        "size": 6.75
+    },
+    "Phi-3.5-Mini-Instruct-DirectML": {
+        "checkpoint": "amd/phi3.5-mini-instruct-int4-awq-block-128-directml",
+        "recipe": "oga-igpu",
+        "suggested": false,
+        "size": 2.14
+    },
+    "Qwen-1.5-7B-Chat-DirectML": {
+        "checkpoint": "amd/Qwen1.5-7B-Chat-dml-int4-awq-block-128-directml",
+        "recipe": "oga-igpu",
+        "suggested": false,
+        "size": 3.73
+    },
+    "Mistral-7B-v0.1-Instruct-DirectML": {
+        "checkpoint": "amd/Mistral-7B-Instruct-v0.1-awq-g128-int4-onnx-directml",
+        "recipe": "oga-igpu",
+        "suggested": false,
+        "size": 3.67
+    },
+    "Llama-3-8B-Instruct-DirectML": {
+        "checkpoint": "amd/llama3-8b-instruct-awq-g128-int4-onnx-directml",
+        "recipe": "oga-igpu",
+        "suggested": false,
+        "size": 4.61
+    },
+    "Qwen3-0.6B-GGUF": {
+        "checkpoint": "unsloth/Qwen3-0.6B-GGUF:Q4_0",
+        "recipe": "llamacpp",
+        "suggested": true,
+        "labels": ["reasoning"],
+        "size": 0.38
+    },
+    "Qwen3-1.7B-GGUF": {
+        "checkpoint": "unsloth/Qwen3-1.7B-GGUF:Q4_0",
+        "recipe": "llamacpp",
+        "suggested": true,
+        "labels": ["reasoning"],
+        "size": 1.06
+    },
+    "Qwen3-4B-GGUF": {
+        "checkpoint": "unsloth/Qwen3-4B-GGUF:Q4_0",
+        "recipe": "llamacpp",
+        "suggested": true,
+        "labels": ["reasoning"],
+        "size": 2.38
+    },
+    "Qwen3-8B-GGUF": {
+        "checkpoint": "unsloth/Qwen3-8B-GGUF:Q4_1",
+        "recipe": "llamacpp",
+        "suggested": true,
+        "labels": ["reasoning"],
+        "size": 5.25
+    },
+    "DeepSeek-Qwen3-8B-GGUF": {
+        "checkpoint": "unsloth/DeepSeek-R1-0528-Qwen3-8B-GGUF:Q4_1",
+        "recipe": "llamacpp",
+        "suggested": true,
+        "labels": ["reasoning"],
+        "size": 5.25
+    },
+    "Qwen3-14B-GGUF": {
+        "checkpoint": "unsloth/Qwen3-14B-GGUF:Q4_0",
+        "recipe": "llamacpp",
+        "suggested": true,
+        "labels": ["reasoning"],
+        "size": 8.54
+    },
+    "Qwen3-4B-Instruct-2507-GGUF": {
+        "checkpoint": "unsloth/Qwen3-4B-Instruct-2507-GGUF:Qwen3-4B-Instruct-2507-Q4_K_M.gguf",
+        "recipe": "llamacpp",
+        "suggested": true,
+        "labels": ["hot"],
+        "size": 2.5
+    },
+    "Qwen3-30B-A3B-GGUF": {
+        "checkpoint": "unsloth/Qwen3-30B-A3B-GGUF:Q4_0",
+        "recipe": "llamacpp",
+        "suggested": true,
+        "labels": ["reasoning"],
+        "size": 17.4
+    },
+    "Qwen3-30B-A3B-Instruct-2507-GGUF": {
+        "checkpoint": "unsloth/Qwen3-30B-A3B-Instruct-2507-GGUF:Qwen3-30B-A3B-Instruct-2507-Q4_0.gguf",
+        "recipe": "llamacpp",
+        "suggested": true,
+        "size": 17.4
+    },
+    "Qwen3-Coder-30B-A3B-Instruct-GGUF": {
+        "checkpoint": "unsloth/Qwen3-Coder-30B-A3B-Instruct-GGUF:Qwen3-Coder-30B-A3B-Instruct-Q4_K_M.gguf",
+        "recipe": "llamacpp",
+        "suggested": true,
+        "labels": ["coding","tool-calling","hot"],
+        "size": 18.6
+    },
+    "Gemma-3-4b-it-GGUF": {
+        "checkpoint": "ggml-org/gemma-3-4b-it-GGUF:Q4_K_M",
+        "mmproj": "mmproj-model-f16.gguf",
+        "recipe": "llamacpp",
+        "suggested": true,
+        "labels": ["hot","vision"],
+        "size": 3.61
+    },
+    "Phi-4-mini-instruct-GGUF": {
+        "checkpoint": "unsloth/Phi-4-mini-instruct-GGUF:Phi-4-mini-instruct-Q4_K_M.gguf",
+        "recipe": "llamacpp",
+        "suggested": true,
+        "size": 2.49
+    },
+    "LFM2-1.2B-GGUF": {
+        "checkpoint": "LiquidAI/LFM2-1.2B-GGUF:LFM2-1.2B-Q4_K_M.gguf",
+        "recipe": "llamacpp",
+        "suggested": true,
+        "size": 0.731
+    },
+    "Jan-nano-128k-GGUF": {
+        "checkpoint": "Menlo/Jan-nano-128k-gguf:jan-nano-128k-Q4_K_M.gguf",
+        "recipe": "llamacpp",
+        "suggested": true,
+        "size": 2.5
+    },
+    "Jan-v1-4B-GGUF": {
+        "checkpoint": "janhq/Jan-v1-4B-GGUF:Jan-v1-4B-Q4_K_M.gguf",
+        "recipe": "llamacpp",
+        "suggested": true,
+        "size": 2.5
+    },
+    "Llama-3.2-1B-Instruct-GGUF": {
+        "checkpoint": "unsloth/Llama-3.2-1B-Instruct-GGUF:Llama-3.2-1B-Instruct-UD-Q4_K_XL.gguf",
+        "recipe": "llamacpp",
+        "suggested": true,
+        "size": 0.834
+    },
+    "Llama-3.2-3B-Instruct-GGUF": {
+        "checkpoint": "unsloth/Llama-3.2-3B-Instruct-GGUF:Llama-3.2-3B-Instruct-UD-Q4_K_XL.gguf",
+        "recipe": "llamacpp",
+        "suggested": true,
+        "size": 2.06
+    },
+    "SmolLM3-3B-GGUF": {
+        "checkpoint": "unsloth/SmolLM3-3B-128K-GGUF:SmolLM3-3B-128K-UD-Q4_K_XL.gguf",
+        "recipe": "llamacpp",
+        "suggested": true,
+        "size": 1.94
+    },
+    "Ministral-3-3B-Instruct-2512-GGUF": {
+        "checkpoint": "mistralai/Ministral-3-3B-Instruct-2512-GGUF:Ministral-3-3B-Instruct-2512-Q4_K_M.gguf",
+        "mmproj": "Ministral-3-3B-Instruct-2512-BF16-mmproj.gguf",
+        "recipe": "llamacpp",
+        "suggested": true,
+        "labels": ["vision"],
+        "size": 2.85
+    },
+    "Qwen2.5-VL-7B-Instruct-GGUF": {
+        "checkpoint": "ggml-org/Qwen2.5-VL-7B-Instruct-GGUF:Q4_K_M",
+        "mmproj": "mmproj-Qwen2.5-VL-7B-Instruct-f16.gguf",
+        "recipe": "llamacpp",
+        "suggested": true,
+        "labels": ["vision"],
+        "size": 4.68
+    },
+    "Qwen3-VL-4B-Instruct-GGUF": {
+        "checkpoint": "Qwen/Qwen3-VL-4B-Instruct-GGUF:Q4_K_M",
+        "mmproj": "mmproj-Qwen3VL-4B-Instruct-F16.gguf",
+        "recipe": "llamacpp",
+        "suggested": true,
+        "labels": ["vision"],
+        "size": 3.33
+    },
+    "Qwen3-VL-8B-Instruct-GGUF": {
+        "checkpoint": "Qwen/Qwen3-VL-8B-Instruct-GGUF:Q4_K_M",
+        "mmproj": "mmproj-Qwen3VL-8B-Instruct-F16.gguf",
+        "recipe": "llamacpp",
+        "suggested": true,
+        "labels": ["vision"],
+        "size": 6.19
+    },
+    "Qwen3-Next-80B-A3B-Instruct-GGUF": {
+        "checkpoint": "unsloth/Qwen3-Next-80B-A3B-Instruct-GGUF:Qwen3-Next-80B-A3B-Instruct-UD-Q4_K_XL.gguf",
+        "recipe": "llamacpp",
+        "suggested": true,
+        "labels": ["hot"],
+        "size": 45.1
+    },
+    "Llama-4-Scout-17B-16E-Instruct-GGUF": {
+        "checkpoint": "unsloth/Llama-4-Scout-17B-16E-Instruct-GGUF:Q4_K_S",
+        "mmproj": "mmproj-F16.gguf",
+        "recipe": "llamacpp",
+        "suggested": true,
+        "labels": ["vision"],
+        "size": 61.5
+    },
+    "Cogito-v2-llama-109B-MoE-GGUF": {
+        "checkpoint": "unsloth/cogito-v2-preview-llama-109B-MoE-GGUF:Q4_K_M",
+        "mmproj": "mmproj-F16.gguf",
+        "recipe": "llamacpp",
+        "suggested": false,
+        "labels": ["vision"],
+        "size": 65.3
+    },
+    "nomic-embed-text-v1-GGUF": {
+        "checkpoint": "nomic-ai/nomic-embed-text-v1-GGUF:Q4_K_S",
+        "recipe": "llamacpp",
+        "suggested": true,
+        "labels": ["embeddings"],
+        "size": 0.0781
+    },
+    "nomic-embed-text-v2-moe-GGUF": {
+        "checkpoint": "nomic-ai/nomic-embed-text-v2-moe-GGUF:Q8_0",
+        "recipe": "llamacpp",
+        "suggested": true,
+        "labels": ["embeddings"],
+        "size": 0.51
+    },
+    "Qwen3-Embedding-0.6B-GGUF": {
+        "checkpoint": "Qwen/Qwen3-Embedding-0.6B-GGUF:Qwen3-Embedding-0.6B-Q8_0.gguf",
+        "recipe": "llamacpp",
+        "suggested": true,
+        "labels": ["embeddings"],
+        "size": 0.64
+    },
+    "Qwen3-Embedding-4B-GGUF": {
+        "checkpoint": "Qwen/Qwen3-Embedding-4B-GGUF:Qwen3-Embedding-4B-Q8_0.gguf",
+        "recipe": "llamacpp",
+        "suggested": true,
+        "labels": ["embeddings"],
+        "size": 4.28
+    },
+    "Qwen3-Embedding-8B-GGUF": {
+        "checkpoint": "Qwen/Qwen3-Embedding-8B-GGUF:Qwen3-Embedding-8B-Q8_0.gguf",
+        "recipe": "llamacpp",
+        "suggested": true,
+        "labels": ["embeddings"],
+        "size": 8.05
+    },
+    "bge-reranker-v2-m3-GGUF": {
+        "checkpoint": "pqnet/bge-reranker-v2-m3-Q8_0-GGUF",
+        "recipe": "llamacpp",
+        "suggested": true,
+        "labels": ["reranking"],
+        "size": 0.53
+    },
+    "jina-reranker-v1-tiny-en-GGUF": {
+        "checkpoint": "mradermacher/jina-reranker-v1-tiny-en-GGUF:Q8_0",
+        "recipe": "llamacpp",
+        "suggested": false,
+        "labels": ["reranking"],
+        "size": 0.03
+    },
+    "Devstral-Small-2507-GGUF":{
+        "checkpoint": "mistralai/Devstral-Small-2507_gguf:Q4_K_M",
+        "recipe": "llamacpp",
+        "suggested": true,
+        "labels": ["coding","tool-calling"],
+        "size": 14.3
+    },
+    "Qwen2.5-Coder-32B-Instruct-GGUF": {
+        "checkpoint": "Qwen/Qwen2.5-Coder-32B-Instruct-GGUF:Q4_K_M",
+        "recipe": "llamacpp",
+        "suggested": true,
+        "labels": ["coding"],
+        "size": 19.85
+    },
+    "gpt-oss-120b-GGUF": {
+        "checkpoint": "unsloth/gpt-oss-120b-GGUF:Q4_K_M",
+        "recipe": "llamacpp",
+        "suggested": false,
+        "labels": ["reasoning", "tool-calling"],
+        "size": 62.7
+    },
+    "gpt-oss-20b-GGUF": {
+        "checkpoint": "unsloth/gpt-oss-20b-GGUF:Q4_K_M",
+        "recipe": "llamacpp",
+        "suggested": false,
+        "labels": ["reasoning", "tool-calling"],
+        "size": 11.6
+    },
+    "gpt-oss-120b-mxfp-GGUF": {
+        "checkpoint": "ggml-org/gpt-oss-120b-GGUF:*",
+        "recipe": "llamacpp",
+        "suggested": true,
+        "labels": ["hot", "reasoning", "tool-calling"],
+        "size": 63.3
+    },
+    "gpt-oss-20b-mxfp4-GGUF": {
+        "checkpoint": "ggml-org/gpt-oss-20b-GGUF",
+        "recipe": "llamacpp",
+        "suggested": true,
+        "labels": ["hot", "reasoning", "tool-calling"],
+        "size": 12.1
+    },
+    "GLM-4.5-Air-UD-Q4K-XL-GGUF": {
+        "checkpoint": "unsloth/GLM-4.5-Air-GGUF:UD-Q4_K_XL",
+        "recipe": "llamacpp",
+        "suggested": true,
+        "labels": ["reasoning"],
+        "size": 73.1
+    },
+    "Playable1-GGUF": {
+        "checkpoint": "playable/Playable1-GGUF:Playable1-q4_k_m.gguf",
+        "recipe": "llamacpp",
+        "suggested": false,
+        "labels": ["coding"],
+        "size": 4.68
+    },
+    "granite-4.0-h-tiny-GGUF": {
+        "checkpoint": "unsloth/granite-4.0-h-tiny-GGUF:Q4_K_M",
+        "recipe": "llamacpp",
+        "suggested": true,
+        "labels": ["tool-calling"],
+        "size": 4.25
+    },
+    "LFM2-8B-A1B-GGUF": {
+        "checkpoint": "LiquidAI/LFM2-8B-A1B-GGUF:Q4_K_M",
+        "recipe": "llamacpp",
+        "suggested": true,
+        "size": 4.8
+    },
+    "gpt-oss-20b-FLM": {
+        "checkpoint": "gpt-oss:20b",
+        "recipe": "flm",
+        "suggested": true,
+        "labels": ["reasoning"],
+        "size": 13.4
+    },
+    "Gemma3-1b-it-FLM": {
+        "checkpoint": "gemma3:1b",
+        "recipe": "flm",
+        "suggested": true,
+        "size": 1.17
+    },
+    "Gemma3-4b-it-FLM": {
+        "checkpoint": "gemma3:4b",
+        "recipe": "flm",
+        "suggested": true,
+        "labels": ["hot","vision"],
+        "size": 5.26
+    },
+    "Qwen3-4B-VL-FLM": {
+        "checkpoint": "qwen3vl-it:4b",
+        "recipe": "flm",
+        "suggested": true,
+        "labels": ["hot","vision"],
+        "size": 3.85
+    },
+    "Qwen3-0.6b-FLM": {
+        "checkpoint": "qwen3:0.6b",
+        "recipe": "flm",
+        "suggested": true,
+        "labels": ["reasoning"],
+        "size": 0.66
+    },
+    "Qwen3-4B-Instruct-2507-FLM": {
+        "checkpoint": "qwen3-it:4b",
+        "recipe": "flm",
+        "suggested": true,
+        "size": 3.07
+    },
+    "Qwen3-8b-FLM": {
+        "checkpoint": "qwen3:8b",
+        "recipe": "flm",
+        "suggested": true,
+        "labels": ["reasoning"],
+        "size": 5.57
+    },
+    "Llama-3.1-8B-FLM": {
+        "checkpoint": "llama3.1:8b",
+        "recipe": "flm",
+        "suggested": true,
+        "size": 5.36
+    },
+    "Llama-3.2-1B-FLM": {
+        "checkpoint": "llama3.2:1b",
+        "recipe": "flm",
+        "suggested": true,
+        "size": 1.21
+    },
+    "Llama-3.2-3B-FLM": {
+        "checkpoint": "llama3.2:3b",
+        "recipe": "flm",
+        "suggested": true,
+        "size": 2.62
+    },
+    "LFM2-1.2B-FLM": {
+        "checkpoint": "lfm2:1.2b",
+        "recipe": "flm",
+        "suggested": true,
+        "size": 0.96
+    },
+    "Whisper-Tiny": {
+        "checkpoint": "ggerganov/whisper.cpp:ggml-tiny.bin",
+        "recipe": "whispercpp",
+        "suggested": true,
+        "labels": ["audio", "transcription"],
+        "size": 0.075
+    },
+    "Whisper-Base": {
+        "checkpoint": "ggerganov/whisper.cpp:ggml-base.bin",
+        "recipe": "whispercpp",
+        "suggested": true,
+        "labels": ["audio", "transcription"],
+        "size": 0.142
+    },
+    "Whisper-Small": {
+        "checkpoint": "ggerganov/whisper.cpp:ggml-small.bin",
+        "recipe": "whispercpp",
+        "suggested": true,
+        "labels": ["audio", "transcription"],
+        "size": 0.466
+    },
+    "Whisper-Medium": {
+        "checkpoint": "ggerganov/whisper.cpp:ggml-medium.bin",
+        "recipe": "whispercpp",
+        "suggested": true,
+        "labels": ["audio", "transcription"],
+        "size": 1.42
+    },
+    "Whisper-Large-v3": {
+        "checkpoint": "ggerganov/whisper.cpp:ggml-large-v3.bin",
+        "recipe": "whispercpp",
+        "suggested": true,
+        "labels": ["audio", "transcription"],
+        "size": 2.87
+    },
+    "Whisper-Large-v3-Turbo": {
+        "checkpoint": "ggerganov/whisper.cpp:ggml-large-v3-turbo.bin",
+        "recipe": "whispercpp",
+        "suggested": true,
+        "labels": ["audio", "transcription", "hot"],
+        "size": 1.55
+    }
+}

lemonade_server/settings.py ADDED Viewed

@@ -0,0 +1,39 @@
+import json
+import os
+from lemonade.cache import DEFAULT_CACHE_DIR
+# Define the path for the user settings file, placing it in the cache directory
+USER_SETTINGS_FILE = os.path.join(DEFAULT_CACHE_DIR, "user_settings.json")
+def save_setting(key, value):
+    """Save a setting to the user_settings.json file."""
+    # Ensure the cache directory exists
+    os.makedirs(DEFAULT_CACHE_DIR, exist_ok=True)
+    settings = {}
+    if os.path.exists(USER_SETTINGS_FILE):
+        with open(USER_SETTINGS_FILE, "r") as f:
+            try:
+                settings = json.load(f)
+            except json.JSONDecodeError:
+                # If the file is empty or corrupt, start with a fresh dictionary
+                pass
+    settings[key] = value
+    with open(USER_SETTINGS_FILE, "w") as f:
+        json.dump(settings, f, indent=4)
+def load_setting(key, default=None):
+    """Load a setting from the user_settings.json file."""
+    if not os.path.exists(USER_SETTINGS_FILE):
+        return default
+    with open(USER_SETTINGS_FILE, "r") as f:
+        try:
+            settings = json.load(f)
+            return settings.get(key, default)
+        except json.JSONDecodeError:
+            # Return default if the file is empty or corrupt
+            return default