PyPI - lemonade-sdk - Versions diffs - 7.0.2__py3-none-any.whl → 7.0.3__py3-none-any.whl - Mend

lemonade-sdk 7.0.2py3-none-any.whl → 7.0.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of lemonade-sdk might be problematic. Click here for more details.

Files changed (18) hide show

lemonade/tools/huggingface_load.py CHANGED Viewed

@@ -326,6 +326,7 @@ class HuggingfaceAdapter(ModelAdapter):
     def generate(
         self,
         input_ids,
+        random_seed=1,
         **kwargs,
     ):
@@ -346,6 +347,11 @@ class HuggingfaceAdapter(ModelAdapter):
             **kwargs,
         }
+        if random_seed is None:
+            torch.random.seed()
+        else:
+            torch.random.manual_seed(random_seed)
         with torch.no_grad(), torch.inference_mode():
             outputs = self.model.generate(input_ids=input_ids, **generation_kwargs)

lemonade/tools/ort_genai/oga.py CHANGED Viewed

@@ -139,6 +139,7 @@ class OrtGenaiModel(ModelAdapter):
         pad_token_id=None,
         stopping_criteria=None,
         max_length=None,
+        random_seed=1,
     ):
         params = og.GeneratorParams(self.model)
@@ -179,6 +180,9 @@ class OrtGenaiModel(ModelAdapter):
         if use_oga_pre_6_api:
             params.input_ids = input_ids
+        if random_seed is None:
+            random_seed = -1  # In og.Generator, -1 = seed with random device
         if self.config and "search" in self.config:
             search_config = self.config["search"]
             params.set_search_options(
@@ -196,10 +200,7 @@ class OrtGenaiModel(ModelAdapter):
                 past_present_share_buffer=search_config.get(
                     "past_present_share_buffer", True
                 ),
-                # Make sure that results do not vary across laptops
-                # by default, random_seed=-1 causes different laptops to give
-                # different results
-                random_seed=1,
+                random_seed=random_seed,
                 # Not currently supported by OGA
                 # diversity_penalty=search_config.get('diversity_penalty', 0.0),
                 # no_repeat_ngram_size=search_config.get('no_repeat_ngram_size', 0),
@@ -212,6 +213,7 @@ class OrtGenaiModel(ModelAdapter):
                 temperature=temperature,
                 max_length=max_length_to_use,
                 min_length=min_length,
+                random_seed=random_seed,
             )
         params.try_graph_capture_with_max_batch_size(1)

lemonade/tools/prompt.py CHANGED Viewed

@@ -15,6 +15,7 @@ DEFAULT_GENERATE_PARAMS = {
     "temperature": 0.7,
 }
+DEFAULT_RANDOM_SEED = 1
 DEFAULT_MAX_NEW_TOKENS = 512
 DEFAULT_N_TRIALS = 1
@@ -108,6 +109,19 @@ class LLMPrompt(Tool):
             f"(useful for testing, default is {DEFAULT_N_TRIALS})",
         )
+        parser.add_argument(
+            "--random-seed",
+            "-r",
+            default=str(DEFAULT_RANDOM_SEED),
+            help="Positive integer seed for random number generator used in "
+            "sampling tokens "
+            f"(default is {DEFAULT_RANDOM_SEED}). If the number of trials is "
+            "greater than one, then the seed is incremented by one for each "
+            "trial. Set to `None` for random, non-repeatable results.  This "
+            "random seed behavior only applies to models loaded with "
+            "`oga-load` or `huggingface-load`.",
+        )
         return parser
     def parse(self, state: State, args, known_only=True) -> argparse.Namespace:
@@ -123,6 +137,11 @@ class LLMPrompt(Tool):
             with open(parsed_args.prompt, "r", encoding="utf-8") as f:
                 parsed_args.prompt = f.read()
+        if parsed_args.random_seed == "None":
+            parsed_args.random_seed = None
+        else:
+            parsed_args.random_seed = int(parsed_args.random_seed)
         return parsed_args
     def run(
@@ -132,6 +151,7 @@ class LLMPrompt(Tool):
         max_new_tokens: int = DEFAULT_MAX_NEW_TOKENS,
         n_trials: int = DEFAULT_N_TRIALS,
         template: bool = False,
+        random_seed: int = DEFAULT_RANDOM_SEED,
     ) -> State:
         model: ModelAdapter = state.model
@@ -170,9 +190,16 @@ class LLMPrompt(Tool):
             # Get the response from the LLM, which may include the prompt in it
             response = model.generate(
-                input_ids, max_new_tokens=max_new_tokens, **DEFAULT_GENERATE_PARAMS
+                input_ids,
+                max_new_tokens=max_new_tokens,
+                random_seed=random_seed,
+                **DEFAULT_GENERATE_PARAMS,
             )
+            # Increment random seed if not none
+            if random_seed is not None:
+                random_seed += 1
             # Flatten the input and response
             input_ids_array = (
                 input_ids if isinstance(input_ids, (list, str)) else input_ids[0]

lemonade/tools/server/instructions.py CHANGED Viewed

@@ -25,270 +25,13 @@ def get_instructions_html(port=8000):
         f"<script>window.SERVER_MODELS = {json.dumps(filtered_models)};</script>"
     )
-    # New lemon-themed HTML structure
-    # pylint: disable=W1401
-    styled_html = f"""
-    <!DOCTYPE html>
-    <html lang=\"en\">
-    <head>
-        <meta charset=\"UTF-8\">
-        <meta name=\"viewport\" content=\"width=device-width, initial-scale=1.0\">
-        <title>Lemonade Server</title>
-        <link rel="icon" href="data:,">
-        <link rel=\"stylesheet\" href=\"/static/styles.css\">
-        <script>
-        window.SERVER_PORT = {port};
-        </script>
-        {server_models_js}
-    </head>
-    <body>
-        <nav class=\"navbar\">
-            <a href=\"https://github.com/lemonade-sdk/lemonade\">GitHub</a>
-            <a href=\"https://lemonade-server.ai/docs/\">Docs</a>
-            <a href=\"https://lemonade-server.ai/docs/server/server_models/\">Models</a>
-            <a href=\"https://lemonade-server.ai/docs/server/apps/\">Featured Apps</a>
-        </nav>
-        <main class=\"main\">
-            <div class=\"title\">🍋 Lemonade Server</div>
-            <div class=\"tab-container\">
-                <div class=\"tabs\">
-                    <button class=\"tab active\" id=\"tab-chat\" onclick=\"showTab('chat')\">LLM Chat</button>
-                    <button class=\"tab\" id=\"tab-models\" onclick=\"showTab('models')\">Model Management</button>
-                </div>
-                <div class=\"tab-content active\" id=\"content-chat\">
-                    <div class=\"chat-container\">
-                        <div class=\"chat-history\" id=\"chat-history\"></div>
-                        <div class=\"chat-input-row\">
-                            <select id=\"model-select\"></select>
-                            <input type=\"text\" id=\"chat-input\" placeholder=\"Type your message...\" />
-                            <button id=\"send-btn\">Send</button>
-                        </div>
-                    </div>
-                </div>
-                <div class=\"tab-content\" id=\"content-models\">
-                    <div class=\"model-mgmt-container\">
-                        <div class=\"model-mgmt-pane\">
-                            <h3>Installed Models</h3>
-                            <table class=\"model-table\" id=\"installed-models-table\">
-                                <colgroup><col style=\"width:100%\"></colgroup>
-                                <tbody id=\"installed-models-tbody\"></tbody>
-                            </table>
-                        </div>
-                        <div class=\"model-mgmt-pane\">
-                            <h3>Suggested Models</h3>
-                            <table class=\"model-table\" id=\"suggested-models-table\">
-                                <tbody id=\"suggested-models-tbody\"></tbody>
-                            </table>
-                        </div>
-                    </div>
-                </div>
-            </div>
-        </main>
-        <footer class=\"site-footer\">
-            <div class=\"dad-joke\">When life gives you LLMs, make an LLM aide.</div>
-            <div class=\"copyright\">Copyright 2025 AMD</div>
-        </footer>
-        <script src=\"https://cdn.jsdelivr.net/npm/openai@4.21.0/dist/openai.min.js\"></script>
-        <script>
-        // Tab switching logic
-        function showTab(tab) {{
-            document.getElementById('tab-chat').classList.remove('active');
-            document.getElementById('tab-models').classList.remove('active');
-            document.getElementById('content-chat').classList.remove('active');
-            document.getElementById('content-models').classList.remove('active');
-            if (tab === 'chat') {{
-                document.getElementById('tab-chat').classList.add('active');
-                document.getElementById('content-chat').classList.add('active');
-            }} else {{
-                document.getElementById('tab-models').classList.add('active');
-                document.getElementById('content-models').classList.add('active');
-            }}
-        }}
+    # Load HTML template
+    template_path = Path(__file__).parent / "static" / "instructions.html"
+    with open(template_path, "r", encoding="utf-8") as f:
+        html_template = f.read()
-        // Helper to get server base URL
-        function getServerBaseUrl() {{
-            const port = window.SERVER_PORT || 8000;
-            return `http://localhost:{port}`;
-        }}
+    # Replace template variables
+    html_content = html_template.replace("{{SERVER_PORT}}", str(port))
+    html_content = html_content.replace("{{SERVER_MODELS_JS}}", server_models_js)
-        // Populate model dropdown from /api/v1/models endpoint
-        async function loadModels() {{
-            try {{
-                const resp = await fetch(getServerBaseUrl() + '/api/v1/models');
-                const data = await resp.json();
-                const select = document.getElementById('model-select');
-                select.innerHTML = '';
-                if (!data.data || !Array.isArray(data.data)) {{
-                    select.innerHTML = '<option>No models found (malformed response)</option>';
-                    return;
-                }}
-                if (data.data.length === 0) {{
-                    select.innerHTML = '<option>No models available</option>';
-                    return;
-                }}
-                let defaultIndex = 0;
-                data.data.forEach(function(model, index) {{
-                    const modelId = model.id || model.name || model;
-                    const opt = document.createElement('option');
-                    opt.value = modelId;
-                    opt.textContent = modelId;
-                    if (modelId === 'Llama-3.2-1B-Instruct-Hybrid') {{
-                        defaultIndex = index;
-                    }}
-                    select.appendChild(opt);
-                }});
-                select.selectedIndex = defaultIndex;
-            }} catch (e) {{
-                const select = document.getElementById('model-select');
-                select.innerHTML = `<option>Error loading models: ${{e.message}}</option>`;
-                console.error('Error loading models:', e);
-            }}
-        }}
-        loadModels();
-        // Model Management Tab Logic
-        async function refreshModelMgmtUI() {{
-            // Get installed models from /api/v1/models
-            let installed = [];
-            try {{
-                const resp = await fetch(getServerBaseUrl() + '/api/v1/models');
-                const data = await resp.json();
-                if (data.data && Array.isArray(data.data)) {{
-                    installed = data.data.map(m => m.id || m.name || m);
-                }}
-            }} catch (e) {{}}
-            // All models from server_models.json (window.SERVER_MODELS)
-            const allModels = window.SERVER_MODELS || {{}};
-            // Filter suggested models not installed
-            const suggested = Object.keys(allModels).filter(
-                k => allModels[k].suggested && !installed.includes(k)
-            );
-            // Render installed models as a table (two columns, second is invisible)
-            const installedTbody = document.getElementById('installed-models-tbody');
-            installedTbody.innerHTML = '';
-            installed.forEach(function(mid) {{
-                var tr = document.createElement('tr');
-                var tdName = document.createElement('td');
-                tdName.textContent = mid;
-                var tdEmpty = document.createElement('td');
-                tdEmpty.style.width = '0';
-                tdEmpty.style.padding = '0';
-                tdEmpty.style.border = 'none';
-                tr.appendChild(tdName);
-                tr.appendChild(tdEmpty);
-                installedTbody.appendChild(tr);
-            }});
-            // Render suggested models as a table
-            const suggestedTbody = document.getElementById('suggested-models-tbody');
-            suggestedTbody.innerHTML = '';
-            suggested.forEach(mid => {{
-                const tr = document.createElement('tr');
-                const tdName = document.createElement('td');
-                tdName.textContent = mid;
-                tdName.style.paddingRight = '1em';
-                tdName.style.verticalAlign = 'middle';
-                const tdBtn = document.createElement('td');
-                tdBtn.style.width = '1%';
-                tdBtn.style.verticalAlign = 'middle';
-                const btn = document.createElement('button');
-                btn.textContent = '+';
-                btn.title = 'Install model';
-                btn.onclick = async function() {{
-                    btn.disabled = true;
-                    btn.textContent = 'Installing...';
-                    btn.classList.add('installing-btn');
-                    try {{
-                        await fetch(getServerBaseUrl() + '/api/v1/pull', {{
-                            method: 'POST',
-                            headers: {{ 'Content-Type': 'application/json' }},
-                            body: JSON.stringify({{ model_name: mid }})
-                        }});
-                        await refreshModelMgmtUI();
-                        await loadModels(); // update chat dropdown too
-                    }} catch (e) {{
-                        btn.textContent = 'Error';
-                    }}
-                }};
-                tdBtn.appendChild(btn);
-                tr.appendChild(tdName);
-                tr.appendChild(tdBtn);
-                suggestedTbody.appendChild(tr);
-            }});
-        }}
-        // Initial load
-        refreshModelMgmtUI();
-        // Optionally, refresh when switching to the tab
-        document.getElementById('tab-models').addEventListener('click', refreshModelMgmtUI);
-        // Chat logic (streaming with OpenAI JS client placeholder)
-        const chatHistory = document.getElementById('chat-history');
-        const chatInput = document.getElementById('chat-input');
-        const sendBtn = document.getElementById('send-btn');
-        const modelSelect = document.getElementById('model-select');
-        let messages = [];
-        function appendMessage(role, text) {{
-            const div = document.createElement('div');
-            div.className = 'chat-message ' + role;
-            // Add a bubble for iMessage style
-            const bubble = document.createElement('div');
-            bubble.className = 'chat-bubble ' + role;
-            bubble.innerHTML = text;
-            div.appendChild(bubble);
-            chatHistory.appendChild(div);
-            chatHistory.scrollTop = chatHistory.scrollHeight;
-        }}
-        async function sendMessage() {{
-            const text = chatInput.value.trim();
-            if (!text) return;
-            appendMessage('user', text);
-            messages.push({{ role: 'user', content: text }});
-            chatInput.value = '';
-            sendBtn.disabled = true;
-            // Streaming OpenAI completions (placeholder, adapt as needed)
-            let llmText = '';
-            appendMessage('llm', '...');
-            const llmDiv = chatHistory.lastChild.querySelector('.chat-bubble.llm');
-            try {{
-                // Use the correct endpoint for chat completions
-                const resp = await fetch(getServerBaseUrl() + '/api/v1/chat/completions', {{
-                    method: 'POST',
-                    headers: {{ 'Content-Type': 'application/json' }},
-                    body: JSON.stringify({{
-                        model: modelSelect.value,
-                        messages: messages,
-                        stream: true
-                    }})
-                }});
-                if (!resp.body) throw new Error('No stream');
-                const reader = resp.body.getReader();
-                let decoder = new TextDecoder();
-                llmDiv.textContent = '';
-                while (true) {{
-                    const {{ done, value }} = await reader.read();
-                    if (done) break;
-                    const chunk = decoder.decode(value);
-                    if (chunk.trim() === 'data: [DONE]' || chunk.trim() === '[DONE]') continue;
-                    // Try to extract the content from the OpenAI chunk
-                    const match = chunk.match(/"content"\s*:\s*"([^"]*)"/);
-                    if (match && match[1]) {{
-                        llmText += match[1];
-                        llmDiv.textContent = llmText;
-                    }}
-                }}
-                messages.push({{ role: 'assistant', content: llmText }});
-            }} catch (e) {{
-                llmDiv.textContent = '[Error: ' + e.message + ']';
-            }}
-            sendBtn.disabled = false;
-        }}
-        sendBtn.onclick = sendMessage;
-        chatInput.addEventListener('keydown', function(e) {{
-            if (e.key === 'Enter') sendMessage();
-        }});
-        </script>
-    </body>
-    </html>
-    """
-    return HTMLResponse(content=styled_html)
+    return HTMLResponse(content=html_content)

lemonade/tools/server/llamacpp.py CHANGED Viewed

@@ -14,8 +14,8 @@ from fastapi.responses import StreamingResponse
 from openai import OpenAI
+from lemonade_server.pydantic_models import ChatCompletionRequest
 from lemonade_server.model_manager import ModelManager
-from lemonade.tools.server.pydantic_models import ChatCompletionRequest
 from lemonade.tools.server.port_utils import find_free_port
 LLAMA_VERSION = "b5543"
@@ -165,24 +165,25 @@ def _wait_for_load(
 def _launch_llama_subprocess(
-    model_path: str, use_gpu: bool, telemetry: LlamaTelemetry
+    snapshot_files: dict, use_gpu: bool, telemetry: LlamaTelemetry
 ) -> subprocess.Popen:
     """
     Launch llama server subprocess with GPU or CPU configuration
     """
+    # Build the base command
+    base_command = [LLAMA_SERVER_EXE_PATH, "-m", snapshot_files["variant"]]
+    if "mmproj" in snapshot_files:
+        base_command.extend(["--mmproj", snapshot_files["mmproj"]])
+        if not use_gpu:
+            base_command.extend(["--no-mmproj-offload"])
     # Find a port, and save it in the telemetry object for future reference
     # by other functions
     telemetry.choose_port()
-    base_command = [
-        LLAMA_SERVER_EXE_PATH,
-        "-m",
-        model_path,
-        "--port",
-        str(telemetry.port),
-        "--jinja",
-    ]
+    # Add port and jinja to enable tool use
+    base_command.extend(["--port", str(telemetry.port), "--jinja"])
     # Configure GPU layers: 99 for GPU, 0 for CPU-only
     ngl_value = "99" if use_gpu else "0"
@@ -204,7 +205,7 @@ def _launch_llama_subprocess(
     return process
-def server_load(checkpoint: str, model_reference: str, telemetry: LlamaTelemetry):
+def server_load(model_config: dict, model_reference: str, telemetry: LlamaTelemetry):
     # Download llama.cpp server if it isn't already available
     if not os.path.exists(LLAMA_SERVER_EXE_DIR):
         # Download llama.cpp server zip
@@ -236,16 +237,15 @@ def server_load(checkpoint: str, model_reference: str, telemetry: LlamaTelemetry
         logging.info("Cleaned up zip file")
     # Download the gguf to the hugging face cache
-    snapshot_path = ModelManager().download_gguf(checkpoint)
-    model_path = os.path.join(snapshot_path, os.listdir(snapshot_path)[0])
-    logging.debug(f"GGUF file path: {model_path}")
+    snapshot_files = ModelManager().download_gguf(model_config)
+    logging.debug(f"GGUF file paths: {snapshot_files}")
     # Start the llama-serve.exe process
     logging.debug(f"Using llama_server for GGUF model: {LLAMA_SERVER_EXE_PATH}")
     # Attempt loading on GPU first
     llama_server_process = _launch_llama_subprocess(
-        model_path, use_gpu=True, telemetry=telemetry
+        snapshot_files, use_gpu=True, telemetry=telemetry
     )
     # Check the /health endpoint until GPU server is ready
@@ -258,7 +258,7 @@ def server_load(checkpoint: str, model_reference: str, telemetry: LlamaTelemetry
     # If loading on GPU failed, try loading on CPU
     if llama_server_process.poll():
         llama_server_process = _launch_llama_subprocess(
-            model_path, use_gpu=False, telemetry=telemetry
+            snapshot_files, use_gpu=False, telemetry=telemetry
         )
         # Check the /health endpoint until CPU server is ready

lemonade/tools/server/serve.py CHANGED Viewed

@@ -46,9 +46,7 @@ from openai.types.responses import (
 import lemonade.api as lemonade_api
 from lemonade_server.model_manager import ModelManager
-from lemonade.tools.management_tools import ManagementTool
-import lemonade.tools.server.llamacpp as llamacpp
-from lemonade.tools.server.pydantic_models import (
+from lemonade_server.pydantic_models import (
     DEFAULT_MAX_NEW_TOKENS,
     LoadConfig,
     CompletionRequest,
@@ -56,6 +54,8 @@ from lemonade.tools.server.pydantic_models import (
     ResponsesRequest,
     PullConfig,
 )
+from lemonade.tools.management_tools import ManagementTool
+import lemonade.tools.server.llamacpp as llamacpp
 from lemonade.tools.server.tool_calls import extract_tool_calls, get_tool_call_pattern
 from lemonade.tools.server.instructions import get_instructions_html
 from lemonade.tools.server.port_utils import lifespan
@@ -1200,7 +1200,7 @@ class Server(ManagementTool):
             # We will populate a LoadConfig that has all of the required fields
             config_to_use: LoadConfig
-            # First, validate that the arguments are valid
+            # First, ensure that the arguments are valid
             if config.model_name:
                 # Get the dictionary of supported model from disk
                 supported_models = ModelManager().supported_models
@@ -1293,7 +1293,7 @@ class Server(ManagementTool):
             try:
                 if config_to_use.recipe == "llamacpp":
                     self.llama_server_process = llamacpp.server_load(
-                        checkpoint=config_to_use.checkpoint,
+                        model_config=config_to_use,
                         model_reference=model_reference,
                         telemetry=self.llama_telemetry,
                     )

lemonade/tools/server/static/instructions.html ADDED Viewed

@@ -0,0 +1,262 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Lemonade Server</title>
+    <link rel="icon" href="data:,">
+    <link rel="stylesheet" href="/static/styles.css">
+    <script>
+    window.SERVER_PORT = {{SERVER_PORT}};
+    </script>
+    {{SERVER_MODELS_JS}}
+</head>
+<body>
+    <nav class="navbar">
+        <a href="https://github.com/lemonade-sdk/lemonade">GitHub</a>
+        <a href="https://lemonade-server.ai/docs/">Docs</a>
+        <a href="https://lemonade-server.ai/docs/server/server_models/">Models</a>
+        <a href="https://lemonade-server.ai/docs/server/apps/">Featured Apps</a>
+    </nav>
+    <main class="main">
+        <div class="title">🍋 Lemonade Server</div>
+        <div class="tab-container">
+            <div class="tabs">
+                <button class="tab active" id="tab-chat" onclick="showTab('chat')">LLM Chat</button>
+                <button class="tab" id="tab-models" onclick="showTab('models')">Model Management</button>
+            </div>
+            <div class="tab-content active" id="content-chat">
+                <div class="chat-container">
+                    <div class="chat-history" id="chat-history"></div>
+                    <div class="chat-input-row">
+                        <select id="model-select"></select>
+                        <input type="text" id="chat-input" placeholder="Type your message..." />
+                        <button id="send-btn">Send</button>
+                    </div>
+                </div>
+            </div>
+            <div class="tab-content" id="content-models">
+                <div class="model-mgmt-container">
+                    <div class="model-mgmt-pane">
+                        <h3>Installed Models</h3>
+                        <table class="model-table" id="installed-models-table">
+                            <colgroup><col style="width:100%"></colgroup>
+                            <tbody id="installed-models-tbody"></tbody>
+                        </table>
+                    </div>
+                    <div class="model-mgmt-pane">
+                        <h3>Suggested Models</h3>
+                        <table class="model-table" id="suggested-models-table">
+                            <tbody id="suggested-models-tbody"></tbody>
+                        </table>
+                    </div>
+                </div>
+            </div>
+        </div>
+    </main>
+    <footer class="site-footer">
+        <div class="dad-joke">When life gives you LLMs, make an LLM aide.</div>
+        <div class="copyright">Copyright 2025 AMD</div>
+    </footer>
+    <script src="https://cdn.jsdelivr.net/npm/openai@4.21.0/dist/openai.min.js"></script>
+    <script>
+    // Tab switching logic
+    function showTab(tab) {
+        document.getElementById('tab-chat').classList.remove('active');
+        document.getElementById('tab-models').classList.remove('active');
+        document.getElementById('content-chat').classList.remove('active');
+        document.getElementById('content-models').classList.remove('active');
+        if (tab === 'chat') {
+            document.getElementById('tab-chat').classList.add('active');
+            document.getElementById('content-chat').classList.add('active');
+        } else {
+            document.getElementById('tab-models').classList.add('active');
+            document.getElementById('content-models').classList.add('active');
+        }
+    }
+    // Helper to get server base URL
+    function getServerBaseUrl() {
+        const port = window.SERVER_PORT || 8000;
+        return `http://localhost:${port}`;
+    }
+    // Populate model dropdown from /api/v1/models endpoint
+    async function loadModels() {
+        try {
+            const resp = await fetch(getServerBaseUrl() + '/api/v1/models');
+            const data = await resp.json();
+            const select = document.getElementById('model-select');
+            select.innerHTML = '';
+            if (!data.data || !Array.isArray(data.data)) {
+                select.innerHTML = '<option>No models found (malformed response)</option>';
+                return;
+            }
+            if (data.data.length === 0) {
+                select.innerHTML = '<option>No models available</option>';
+                return;
+            }
+            let defaultIndex = 0;
+            data.data.forEach(function(model, index) {
+                const modelId = model.id || model.name || model;
+                const opt = document.createElement('option');
+                opt.value = modelId;
+                opt.textContent = modelId;
+                if (modelId === 'Llama-3.2-1B-Instruct-Hybrid') {
+                    defaultIndex = index;
+                }
+                select.appendChild(opt);
+            });
+            select.selectedIndex = defaultIndex;
+        } catch (e) {
+            const select = document.getElementById('model-select');
+            select.innerHTML = `<option>Error loading models: ${e.message}</option>`;
+            console.error('Error loading models:', e);
+        }
+    }
+    loadModels();
+    // Model Management Tab Logic
+    async function refreshModelMgmtUI() {
+        // Get installed models from /api/v1/models
+        let installed = [];
+        try {
+            const resp = await fetch(getServerBaseUrl() + '/api/v1/models');
+            const data = await resp.json();
+            if (data.data && Array.isArray(data.data)) {
+                installed = data.data.map(m => m.id || m.name || m);
+            }
+        } catch (e) {}
+        // All models from server_models.json (window.SERVER_MODELS)
+        const allModels = window.SERVER_MODELS || {};
+        // Filter suggested models not installed
+        const suggested = Object.keys(allModels).filter(
+            k => allModels[k].suggested && !installed.includes(k)
+        );
+        // Render installed models as a table (two columns, second is invisible)
+        const installedTbody = document.getElementById('installed-models-tbody');
+        installedTbody.innerHTML = '';
+        installed.forEach(function(mid) {
+            var tr = document.createElement('tr');
+            var tdName = document.createElement('td');
+            tdName.textContent = mid;
+            var tdEmpty = document.createElement('td');
+            tdEmpty.style.width = '0';
+            tdEmpty.style.padding = '0';
+            tdEmpty.style.border = 'none';
+            tr.appendChild(tdName);
+            tr.appendChild(tdEmpty);
+            installedTbody.appendChild(tr);
+        });
+        // Render suggested models as a table
+        const suggestedTbody = document.getElementById('suggested-models-tbody');
+        suggestedTbody.innerHTML = '';
+        suggested.forEach(mid => {
+            const tr = document.createElement('tr');
+            const tdName = document.createElement('td');
+            tdName.textContent = mid;
+            tdName.style.paddingRight = '1em';
+            tdName.style.verticalAlign = 'middle';
+            const tdBtn = document.createElement('td');
+            tdBtn.style.width = '1%';
+            tdBtn.style.verticalAlign = 'middle';
+            const btn = document.createElement('button');
+            btn.textContent = '+';
+            btn.title = 'Install model';
+            btn.onclick = async function() {
+                btn.disabled = true;
+                btn.textContent = 'Installing...';
+                btn.classList.add('installing-btn');
+                try {
+                    await fetch(getServerBaseUrl() + '/api/v1/pull', {
+                        method: 'POST',
+                        headers: { 'Content-Type': 'application/json' },
+                        body: JSON.stringify({ model_name: mid })
+                    });
+                    await refreshModelMgmtUI();
+                    await loadModels(); // update chat dropdown too
+                } catch (e) {
+                    btn.textContent = 'Error';
+                }
+            };
+            tdBtn.appendChild(btn);
+            tr.appendChild(tdName);
+            tr.appendChild(tdBtn);
+            suggestedTbody.appendChild(tr);
+        });
+    }
+    // Initial load
+    refreshModelMgmtUI();
+    // Optionally, refresh when switching to the tab
+    document.getElementById('tab-models').addEventListener('click', refreshModelMgmtUI);
+    // Chat logic (streaming with OpenAI JS client placeholder)
+    const chatHistory = document.getElementById('chat-history');
+    const chatInput = document.getElementById('chat-input');
+    const sendBtn = document.getElementById('send-btn');
+    const modelSelect = document.getElementById('model-select');
+    let messages = [];
+    function appendMessage(role, text) {
+        const div = document.createElement('div');
+        div.className = 'chat-message ' + role;
+        // Add a bubble for iMessage style
+        const bubble = document.createElement('div');
+        bubble.className = 'chat-bubble ' + role;
+        bubble.innerHTML = text;
+        div.appendChild(bubble);
+        chatHistory.appendChild(div);
+        chatHistory.scrollTop = chatHistory.scrollHeight;
+    }
+    async function sendMessage() {
+        const text = chatInput.value.trim();
+        if (!text) return;
+        appendMessage('user', text);
+        messages.push({ role: 'user', content: text });
+        chatInput.value = '';
+        sendBtn.disabled = true;
+        // Streaming OpenAI completions (placeholder, adapt as needed)
+        let llmText = '';
+        appendMessage('llm', '...');
+        const llmDiv = chatHistory.lastChild.querySelector('.chat-bubble.llm');
+        try {
+            // Use the correct endpoint for chat completions
+            const resp = await fetch(getServerBaseUrl() + '/api/v1/chat/completions', {
+                method: 'POST',
+                headers: { 'Content-Type': 'application/json' },
+                body: JSON.stringify({
+                    model: modelSelect.value,
+                    messages: messages,
+                    stream: true
+                })
+            });
+            if (!resp.body) throw new Error('No stream');
+            const reader = resp.body.getReader();
+            let decoder = new TextDecoder();
+            llmDiv.textContent = '';
+            while (true) {
+                const { done, value } = await reader.read();
+                if (done) break;
+                const chunk = decoder.decode(value);
+                if (chunk.trim() === 'data: [DONE]' || chunk.trim() === '[DONE]') continue;
+                // Try to extract the content from the OpenAI chunk
+                const match = chunk.match(/"content"\s*:\s*"([^"]*)"/);
+                if (match && match[1]) {
+                    llmText += match[1];
+                    llmDiv.textContent = llmText;
+                }
+            }
+            messages.push({ role: 'assistant', content: llmText });
+        } catch (e) {
+            llmDiv.textContent = '[Error: ' + e.message + ']';
+        }
+        sendBtn.disabled = false;
+    }
+    sendBtn.onclick = sendMessage;
+    chatInput.addEventListener('keydown', function(e) {
+        if (e.key === 'Enter') sendMessage();
+    });
+    </script>
+</body>
+</html>

lemonade/version.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = "7.0.2"
1	+ __version__ = "7.0.3"

{lemonade_sdk-7.0.2.dist-info → lemonade_sdk-7.0.3.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: lemonade-sdk
-Version: 7.0.2
+Version: 7.0.3
 Summary: Lemonade SDK: Your LLM Aide for Validation and Deployment
 Author-email: lemonade@amd.com
 Requires-Python: >=3.10, <3.12

{lemonade_sdk-7.0.2.dist-info → lemonade_sdk-7.0.3.dist-info}/RECORD RENAMED Viewed

@@ -4,7 +4,7 @@ lemonade/cache.py,sha256=djr2qgyUUAWlQv8FehU9qlNtCwK0IZqo82hcBDyZ3-A,2850
 lemonade/cli.py,sha256=ddN2QqsGMsVwydfcR7MSZu1z8_-bUgUP7dhw9lzbHa8,4424
 lemonade/sequence.py,sha256=KSH7BPsiyDKsOsg_ziQKEGsDwMmuO_YbgPRBxkZd0pw,13267
 lemonade/state.py,sha256=sdSezla7Cd7KYL90xY3p9kcNV4ndSyN6UvNLOr3vBMA,5261
-lemonade/version.py,sha256=iVyoEZ1fyZz5oicAj7ERV3Eld5fVjLM_p365GVSKBpk,22
+lemonade/version.py,sha256=Ur-fY8dgd79WuOM208uDSw5amQiSzM7VmTbWPLQBZvw,22
 lemonade/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 lemonade/common/analyze_model.py,sha256=sYWDznEUEWjx_Qekg7f1hHY4Pfe87IQ77lmsWqePgE0,803
 lemonade/common/build.py,sha256=Pk86mCr6fyBIx2zXDpq0BkdahlCmWRnwSTpShA_gwZw,7849
@@ -27,17 +27,17 @@ lemonade/tools/accuracy.py,sha256=QndammQ1bmlTaF_6YDaaiJp6fpkKZDYGySdQpAgZIp8,11
 lemonade/tools/adapter.py,sha256=4H6gfbjvqyU6qm1_-b2FE-c3a7N9OzEBeDVnIwqRDvg,3014
 lemonade/tools/bench.py,sha256=aN5LMA_EH6-ZhAH3Gf26JYL7s0eKpUd3j-bReRhzvEY,10016
 lemonade/tools/huggingface_bench.py,sha256=POE5JYzArK2FBktazOkluLNFzlLctM39B19fK5sMx-0,10017
-lemonade/tools/huggingface_load.py,sha256=i4duS1DTs797savylsR5TxZRHg8Rjhd7Ogtb0fgoWNA,18716
+lemonade/tools/huggingface_load.py,sha256=857GxaQcqmSv2DSsMh503aSicwQDQg5wGGlpwehHHrg,18868
 lemonade/tools/humaneval.py,sha256=RCkVR-yOL56T4EyURaU3MR3yhU4NCbeuWHDyhVWZtxw,9502
 lemonade/tools/llamacpp.py,sha256=uv-xv5KfHm0eU1I6vEKuaRC-QpilE1FffVA-zoCvHt4,8659
 lemonade/tools/llamacpp_bench.py,sha256=tZamG-1Z5pG_bD4O4yz2mUo2AWwEgOw9RSdEDllW4HY,5941
 lemonade/tools/management_tools.py,sha256=RO-lU-hjZhrP9KD9qcLI7MrLu-Rxnkrxzn45qqwKInE,8554
 lemonade/tools/mmlu.py,sha256=hNa7A8dhpjOtgfd5MGcagpwpw4_AZRZvVj5Duz9LJ88,11381
 lemonade/tools/perplexity.py,sha256=Z6ha7LS5DhdZWHZxhDz8mDnfESbTGc6TGo8KnPjRmiE,5606
-lemonade/tools/prompt.py,sha256=eFm-KsJCzaO_iDaj5JkwZGxceaw0bnhx60ZieJ095k0,7593
+lemonade/tools/prompt.py,sha256=AhRdWpx5BVnuJTmCsxSCw_oKHRlTiRLmOkriXon_mLE,8629
 lemonade/tools/tool.py,sha256=UsxVYukfm_iM3BfeGYPZxQlTK5UfDfDOl3RIyLr8A1Y,13256
 lemonade/tools/ort_genai/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-lemonade/tools/ort_genai/oga.py,sha256=-6y90ivX-LuQK3-ZBSM3llXHtKfBmMxdm7mPvTKSYdU,43883
+lemonade/tools/ort_genai/oga.py,sha256=dZ6kbwHBVfzTujAG0ojYDhjS8uH6kwW5xZTcu20hFIc,43886
 lemonade/tools/ort_genai/oga_bench.py,sha256=T3c40NevM3NA7CT98B6vBj1nXfdITDqpfMHYSjhjwpA,5061
 lemonade/tools/quark/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 lemonade/tools/quark/quark_load.py,sha256=QWzhXP8MehgD_KjnsmN5a-3D5kdI2XZtKTH4HoDoFoo,5572
@@ -46,23 +46,24 @@ lemonade/tools/report/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3h
 lemonade/tools/report/llm_report.py,sha256=bVHhwCINA-Ok2EdSwAsLubsc83N3KWOVuwTguw7jDcE,6676
 lemonade/tools/report/table.py,sha256=a0TXo1X84RxCSu0un_XM3ANOlhLtPDuqtGwR7eomf2s,24853
 lemonade/tools/server/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-lemonade/tools/server/instructions.py,sha256=Lvm-tRZaYgHkyt3zQkmMChkXO6rUiLoIAunudmMr_D8,13388
-lemonade/tools/server/llamacpp.py,sha256=R86Q2btI9_EPpPj27vvELnF9KmKxpu3sPIIS1xW3PIA,9997
+lemonade/tools/server/instructions.py,sha256=PbQ8HItagIWbJLYf2IVPhthYVi1E878vNdS42qmTc3E,1230
+lemonade/tools/server/llamacpp.py,sha256=YqUzx-TmyvWMrZfue7xURFfgTRLPGGSzNJtF9GERC_8,10184
 lemonade/tools/server/port_utils.py,sha256=24Ryz5cNU0R9L1kuVSapZoyXTZHzhF4y0Yje9MVOrE0,1535
-lemonade/tools/server/pydantic_models.py,sha256=z1RAs9hkAFkOfMiTPtmUiC3CD2P6OMI2N0J2ztNs0d4,2179
-lemonade/tools/server/serve.py,sha256=3JQa42WZdllKAf_DY-cal0Pc8vdBZd4vwsfhZmpheS8,52500
+lemonade/tools/server/serve.py,sha256=O2ZcM1xogIRAqBE49tQ-gTFpEXExlwHOT3bYL1rZgmc,52483
 lemonade/tools/server/thread_utils.py,sha256=pK9K_6DNWoQ78NArkAX3Ym2WsxLnCs9sKTk6TitlYnI,2804
 lemonade/tools/server/tool_calls.py,sha256=xrAlQwKG-nv2xLlf8f9CDSaUbyMn8ZtHkds9iZLG9K8,5230
+lemonade/tools/server/static/instructions.html,sha256=tCkc55LrI4oWQM2VYuK3_m02MvG5XxIcTbCSgxyTAIU,11257
 lemonade/tools/server/static/styles.css,sha256=8U1EejQaqRLQ6QTCF5UG_dLPtLjRwT1menUHMDhaq2M,5045
 lemonade_install/__init__.py,sha256=26zohKg2jgr_5y7tObduWMYQg8zCTWMZHL8lfi2zZVQ,40
 lemonade_install/install.py,sha256=61qUO7kWCLcdjK0_IQZ46-rKP_AWkyznh4YpDclPKyM,28036
-lemonade_sdk-7.0.2.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-lemonade_sdk-7.0.2.dist-info/licenses/NOTICE.md,sha256=B8lEqi4QE41J9ljz4Riv2JgHD1v8GCZE6nNBHO3KIA0,2135
+lemonade_sdk-7.0.3.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+lemonade_sdk-7.0.3.dist-info/licenses/NOTICE.md,sha256=B8lEqi4QE41J9ljz4Riv2JgHD1v8GCZE6nNBHO3KIA0,2135
 lemonade_server/cli.py,sha256=DR6sIt66K1sZZG3ascEw_6HUgz3UhU9KGUyzxf4nO_A,7351
-lemonade_server/model_manager.py,sha256=WDGDxrKjq-u2GkGWLNUsRk0d74J-RG2yCYEnH8WMnDw,4010
-lemonade_server/server_models.json,sha256=S_wVpybtBT5xTuM2BLxT83bOsJnPR_yWIl35jy30aJ8,6453
-lemonade_sdk-7.0.2.dist-info/METADATA,sha256=Pf_-kdMDlXVYw_6CHQJDlO3ac4GbHzxENx0Rg8p4QBo,5443
-lemonade_sdk-7.0.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-lemonade_sdk-7.0.2.dist-info/entry_points.txt,sha256=gJppn0ETtXXR6ceKWEIRdk42kMC7ps59EmU3NCPyPUk,144
-lemonade_sdk-7.0.2.dist-info/top_level.txt,sha256=10ap5GNiPhalO4V50LRoxA1FqRT9g3Xkia6BITu880k,42
-lemonade_sdk-7.0.2.dist-info/RECORD,,
+lemonade_server/model_manager.py,sha256=-r9JS_fPcoLCQCFKZfkInBIIgT4F1tQ_EIKqMqNYpqM,5546
+lemonade_server/pydantic_models.py,sha256=pdOZW6nAYKWKllMLR7y5wdbIofIznxe5Vehac0Hgqto,2276
+lemonade_server/server_models.json,sha256=3C-lJ2lsNwdy0AKT_US_lcVOoiF3xmadbiOUeOQuJXA,6927
+lemonade_sdk-7.0.3.dist-info/METADATA,sha256=pSSPTu7kUyAh4W8lCVvxS-WAnjMT9Dsyw0r0WHcrxgA,5443
+lemonade_sdk-7.0.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+lemonade_sdk-7.0.3.dist-info/entry_points.txt,sha256=gJppn0ETtXXR6ceKWEIRdk42kMC7ps59EmU3NCPyPUk,144
+lemonade_sdk-7.0.3.dist-info/top_level.txt,sha256=10ap5GNiPhalO4V50LRoxA1FqRT9g3Xkia6BITu880k,42
+lemonade_sdk-7.0.3.dist-info/RECORD,,

lemonade_server/model_manager.py CHANGED Viewed

@@ -1,7 +1,8 @@
 import json
 import os
 import huggingface_hub
-import pkg_resources
+from importlib.metadata import distributions
+from lemonade_server.pydantic_models import LoadConfig
 class ModelManager:
@@ -64,16 +65,45 @@ class ModelManager:
         """
         return self.filter_models_by_backend(self.downloaded_models)
-    def download_gguf(self, checkpoint) -> str:
-        # The colon after the checkpoint name indicates which
-        # specific GGUF to download
-        repo_id = checkpoint.split(":")[0]
-        pattern_to_match = f'*{checkpoint.split(":")[1]}.gguf'
-        return huggingface_hub.snapshot_download(
-            repo_id=repo_id,
-            allow_patterns=[pattern_to_match],
+    def download_gguf(self, model_config: LoadConfig) -> dict:
+        """
+        Downloads the GGUF file for the given model configuration.
+        """
+        # The variant parameter can be either:
+        # 1. A full GGUF filename (e.g. "model-Q4_0.gguf")
+        # 2. A quantization variant (e.g. "Q4_0")
+        # This code handles both cases by constructing the appropriate filename
+        checkpoint, variant = model_config.checkpoint.split(":")
+        hf_base_name = checkpoint.split("/")[-1].replace("-GGUF", "")
+        variant_name = (
+            variant if variant.endswith(".gguf") else f"{hf_base_name}-{variant}.gguf"
+        )
+        # If there is a mmproj file, add it to the patterns
+        expected_files = {"variant": variant_name}
+        if model_config.mmproj:
+            expected_files["mmproj"] = model_config.mmproj
+        # Download the files
+        snapshot_folder = huggingface_hub.snapshot_download(
+            repo_id=checkpoint,
+            allow_patterns=list(expected_files.values()),
         )
+        # Ensure we downloaded all expected files while creating a dict of the downloaded files
+        snapshot_files = {}
+        for file in expected_files:
+            snapshot_files[file] = os.path.join(snapshot_folder, expected_files[file])
+            if expected_files[file] not in os.listdir(snapshot_folder):
+                raise ValueError(
+                    f"Hugging Face snapshot download for {model_config.checkpoint} "
+                    f"expected file {expected_files[file]} not found in {snapshot_folder}"
+                )
+        # Return a dict that points to the snapshot path of the downloaded GGUF files
+        return snapshot_files
     def download_models(self, models: list[str]):
         """
         Downloads the specified models from Hugging Face.
@@ -88,7 +118,8 @@ class ModelManager:
             print(f"Downloading {model} ({checkpoint})")
             if "gguf" in checkpoint.lower():
-                self.download_gguf(checkpoint)
+                model_config = LoadConfig(**self.supported_models[model])
+                self.download_gguf(model_config)
             else:
                 huggingface_hub.snapshot_download(repo_id=checkpoint)
@@ -97,9 +128,11 @@ class ModelManager:
         Returns a filtered dict of models that are enabled by the
         current environment.
         """
+        installed_packages = {dist.metadata["Name"].lower() for dist in distributions()}
         hybrid_installed = (
-            "onnxruntime-vitisai" in pkg_resources.working_set.by_key
-            and "onnxruntime-genai-directml-ryzenai" in pkg_resources.working_set.by_key
+            "onnxruntime-vitisai" in installed_packages
+            and "onnxruntime-genai-directml-ryzenai" in installed_packages
         )
         filtered = {}
         for model, value in models.items():

{lemonade/tools/server → lemonade_server}/pydantic_models.py RENAMED Viewed

@@ -24,6 +24,8 @@ class LoadConfig(BaseModel):
     max_prompt_length: Optional[int] = None
     # Indicates whether the model is a reasoning model, like DeepSeek
     reasoning: Optional[bool] = False
+    # Indicates which Multimodal Projector (mmproj) file to use
+    mmproj: Optional[str] = None
 class CompletionRequest(BaseModel):

lemonade_server/server_models.json CHANGED Viewed

@@ -187,5 +187,19 @@
         "recipe": "llamacpp",
         "reasoning": true,
         "suggested": true
+    },
+    "Gemma-3-4b-it-GGUF": {
+        "checkpoint": "ggml-org/gemma-3-4b-it-GGUF:Q4_K_M",
+        "mmproj": "mmproj-model-f16.gguf",
+        "recipe": "llamacpp",
+        "reasoning": false,
+        "suggested": true
+    },
+    "Qwen2.5-VL-7B-Instruct": {
+        "checkpoint": "ggml-org/Qwen2.5-VL-7B-Instruct-GGUF:Q4_K_M",
+        "mmproj": "mmproj-Qwen2.5-VL-7B-Instruct-f16.gguf",
+        "recipe": "llamacpp",
+        "reasoning": false,
+        "suggested": true
     }
 }

{lemonade_sdk-7.0.2.dist-info → lemonade_sdk-7.0.3.dist-info}/WHEEL RENAMED Viewed

File without changes

{lemonade_sdk-7.0.2.dist-info → lemonade_sdk-7.0.3.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{lemonade_sdk-7.0.2.dist-info → lemonade_sdk-7.0.3.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{lemonade_sdk-7.0.2.dist-info → lemonade_sdk-7.0.3.dist-info}/licenses/NOTICE.md RENAMED Viewed

File without changes

{lemonade_sdk-7.0.2.dist-info → lemonade_sdk-7.0.3.dist-info}/top_level.txt RENAMED Viewed

File without changes

lemonade-sdk 7.0.2__py3-none-any.whl → 7.0.3__py3-none-any.whl

Potentially problematic release.

lemonade-sdk 7.0.2py3-none-any.whl → 7.0.3py3-none-any.whl