PyPI - lemonade-sdk - Versions diffs - 8.0.3__py3-none-any.whl → 8.0.4__py3-none-any.whl - Mend

lemonade-sdk 8.0.3py3-none-any.whl → 8.0.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of lemonade-sdk might be problematic. Click here for more details.

Files changed (21) hide show

lemonade/tools/humaneval.py +1 -1
lemonade/tools/mmlu.py +1 -1
lemonade/tools/oga/load.py +1 -1
lemonade/tools/perplexity.py +2 -2
lemonade/tools/quark/quark_load.py +1 -1
lemonade/tools/quark/quark_quantize.py +2 -2
lemonade/tools/server/llamacpp.py +130 -9
lemonade/tools/server/serve.py +73 -0
lemonade/tools/server/static/styles.css +424 -4
lemonade/tools/server/static/webapp.html +301 -35
lemonade/version.py +1 -1
{lemonade_sdk-8.0.3.dist-info → lemonade_sdk-8.0.4.dist-info}/METADATA +5 -12
{lemonade_sdk-8.0.3.dist-info → lemonade_sdk-8.0.4.dist-info}/RECORD +21 -21
lemonade_server/model_manager.py +12 -2
lemonade_server/pydantic_models.py +25 -1
lemonade_server/server_models.json +46 -44
{lemonade_sdk-8.0.3.dist-info → lemonade_sdk-8.0.4.dist-info}/WHEEL +0 -0
{lemonade_sdk-8.0.3.dist-info → lemonade_sdk-8.0.4.dist-info}/entry_points.txt +0 -0
{lemonade_sdk-8.0.3.dist-info → lemonade_sdk-8.0.4.dist-info}/licenses/LICENSE +0 -0
{lemonade_sdk-8.0.3.dist-info → lemonade_sdk-8.0.4.dist-info}/licenses/NOTICE.md +0 -0
{lemonade_sdk-8.0.3.dist-info → lemonade_sdk-8.0.4.dist-info}/top_level.txt +0 -0

lemonade/tools/server/static/webapp.html CHANGED Viewed

@@ -33,7 +33,47 @@
                         <input type="text" id="chat-input" placeholder="Type your message..." />
                         <button id="send-btn">Send</button>
                     </div>
-                </div>
+                </div>
+                <!-- App Suggestions Section -->
+                <div class="app-suggestions-section">
+                    <div class="suggestion-text">
+                        Use Lemonade with your favorite app
+                    </div>
+                    <div class="app-logos-grid">
+                        <a href="https://lemonade-server.ai/docs/server/apps/open-webui/" target="_blank" class="app-logo-item" title="Open WebUI">
+                            <img src="https://raw.githubusercontent.com/lemonade-sdk/assets/refs/heads/main/partner_logos/openwebui.jpg" alt="Open WebUI" class="app-logo-img">
+                            <span class="app-name">Open WebUI</span>
+                        </a>
+                        <a href="https://lemonade-server.ai/docs/server/apps/continue/" target="_blank" class="app-logo-item" title="Continue">
+                            <img src="https://raw.githubusercontent.com/lemonade-sdk/assets/refs/heads/main/partner_logos/continue_dev.png" alt="Continue" class="app-logo-img">
+                            <span class="app-name">Continue</span>
+                        </a>
+                        <a href="https://github.com/amd/gaia" target="_blank" class="app-logo-item" title="Gaia">
+                            <img src="https://raw.githubusercontent.com/lemonade-sdk/assets/refs/heads/main/partner_logos/gaia.ico" alt="Gaia" class="app-logo-img">
+                            <span class="app-name">Gaia</span>
+                        </a>
+                        <a href="https://lemonade-server.ai/docs/server/apps/anythingLLM/" target="_blank" class="app-logo-item" title="AnythingLLM">
+                            <img src="https://raw.githubusercontent.com/lemonade-sdk/assets/refs/heads/main/partner_logos/anything_llm.png" alt="AnythingLLM" class="app-logo-img">
+                            <span class="app-name">AnythingLLM</span>
+                        </a>
+                        <a href="https://lemonade-server.ai/docs/server/apps/ai-dev-gallery/" target="_blank" class="app-logo-item" title="AI Dev Gallery">
+                            <img src="https://raw.githubusercontent.com/lemonade-sdk/assets/refs/heads/main/partner_logos/ai_dev_gallery.webp" alt="AI Dev Gallery" class="app-logo-img">
+                            <span class="app-name">AI Dev Gallery</span>
+                        </a>
+                        <a href="https://lemonade-server.ai/docs/server/apps/lm-eval/" target="_blank" class="app-logo-item" title="LM-Eval">
+                            <img src="https://raw.githubusercontent.com/lemonade-sdk/assets/refs/heads/main/partner_logos/lm_eval.png" alt="LM-Eval" class="app-logo-img">
+                            <span class="app-name">LM-Eval</span>
+                        </a>
+                        <a href="https://lemonade-server.ai/docs/server/apps/codeGPT/" target="_blank" class="app-logo-item" title="CodeGPT">
+                            <img src="https://raw.githubusercontent.com/lemonade-sdk/assets/refs/heads/main/partner_logos/codegpt.jpg" alt="CodeGPT" class="app-logo-img">
+                            <span class="app-name">CodeGPT</span>
+                        </a>
+                    <a href="https://github.com/lemonade-sdk/lemonade/blob/main/docs/server/apps/ai-toolkit.md" target="_blank" class="app-logo-item" title="AI Toolkit">
+                        <img src="https://raw.githubusercontent.com/lemonade-sdk/assets/refs/heads/main/partner_logos/ai_toolkit.png" alt="AI Toolkit" class="app-logo-img">
+                        <span class="app-name">AI Toolkit</span>
+                    </a>
+                    </div>
+                </div>
             </div>
             <div class="tab-content" id="content-models">                <div class="model-mgmt-register-form collapsed">                    <h3 class="model-mgmt-form-title" onclick="toggleAddModelForm()">
                         Add a Model
@@ -109,7 +149,66 @@
         <div class="copyright">Copyright 2025 AMD</div>
     </footer>
     <script src="https://cdn.jsdelivr.net/npm/openai@4.21.0/dist/openai.min.js"></script>
-    <script>    // Tab switching logic
+    <script src="https://cdn.jsdelivr.net/npm/marked@9.1.0/marked.min.js"></script>
+    <script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script>
+    <script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
+    <script>
+    // Configure MathJax
+    window.MathJax = {
+        tex: {
+            inlineMath: [['\\(', '\\)'], ['$', '$']],
+            displayMath: [['\\[', '\\]'], ['$$', '$$']],
+            processEscapes: true,
+            processEnvironments: true
+        },
+        options: {
+            skipHtmlTags: ['script', 'noscript', 'style', 'textarea', 'pre']
+        }
+    };
+    </script>
+    <script>
+    // Configure marked.js for safe HTML rendering
+    marked.setOptions({
+        breaks: true,
+        gfm: true,
+        sanitize: false,
+        smartLists: true,
+        smartypants: true
+    });
+    // Function to unescape JSON strings
+    function unescapeJsonString(str) {
+        try {
+            return str.replace(/\\n/g, '\n')
+                     .replace(/\\t/g, '\t')
+                     .replace(/\\r/g, '\r')
+                     .replace(/\\"/g, '"')
+                     .replace(/\\\\/g, '\\');
+        } catch (error) {
+            console.error('Error unescaping string:', error);
+            return str;
+        }
+    }
+    // Function to safely render markdown with MathJax support
+    function renderMarkdown(text) {
+        try {
+            const html = marked.parse(text);
+            // Trigger MathJax to process the new content
+            if (window.MathJax && window.MathJax.typesetPromise) {
+                // Use a timeout to ensure DOM is updated before typesetting
+                setTimeout(() => {
+                    window.MathJax.typesetPromise();
+                }, 0);
+            }
+            return html;
+        } catch (error) {
+            console.error('Error rendering markdown:', error);
+            return text; // fallback to plain text
+        }
+    }
+    // Tab switching logic
     function showTab(tab, updateHash = true) {
         document.getElementById('tab-chat').classList.remove('active');
         document.getElementById('tab-models').classList.remove('active');
@@ -163,6 +262,44 @@
         form.classList.toggle('collapsed');
     }
+    // Handle image load failures for app logos
+    function handleImageFailure(img) {
+        const logoItem = img.closest('.app-logo-item');
+        if (logoItem) {
+            logoItem.classList.add('image-failed');
+        }
+    }
+    // Set up image error handlers when DOM is loaded
+    document.addEventListener('DOMContentLoaded', function() {
+        const logoImages = document.querySelectorAll('.app-logo-img');
+        logoImages.forEach(function(img) {
+            let imageLoaded = false;
+            img.addEventListener('load', function() {
+                imageLoaded = true;
+            });
+            img.addEventListener('error', function() {
+                if (!imageLoaded) {
+                    handleImageFailure(this);
+                }
+            });
+            // Also check if image is already broken (cached failure)
+            if (img.complete && img.naturalWidth === 0) {
+                handleImageFailure(img);
+            }
+            // Timeout fallback for slow connections (5 seconds)
+            setTimeout(function() {
+                if (!imageLoaded && !img.complete) {
+                    handleImageFailure(img);
+                }
+            }, 5000);
+        });
+    });
     // Helper to get server base URL
     function getServerBaseUrl() {
         const port = window.SERVER_PORT || 8000;
@@ -184,17 +321,37 @@
                 select.innerHTML = '<option>No models available</option>';
                 return;
             }
+            // Filter out embedding models from chat interface
+            const allModels = window.SERVER_MODELS || {};
+            let filteredModels = [];
             let defaultIndex = 0;
-            data.data.forEach(function(model, index) {
+            data.data.forEach(function(model) {
                 const modelId = model.id || model.name || model;
+                const modelInfo = allModels[modelId] || {};
+                const labels = modelInfo.labels || [];
+                // Skip models with "embeddings" or "reranking" label
+                if (labels.includes('embeddings') || labels.includes('reranking')) {
+                    return;
+                }
+                filteredModels.push(modelId);
                 const opt = document.createElement('option');
                 opt.value = modelId;
                 opt.textContent = modelId;
                 if (modelId === 'Llama-3.2-1B-Instruct-Hybrid') {
-                    defaultIndex = index;
+                    defaultIndex = filteredModels.length - 1;
                 }
                 select.appendChild(opt);
             });
+            if (filteredModels.length === 0) {
+                select.innerHTML = '<option>No chat models available</option>';
+                return;
+            }
             select.selectedIndex = defaultIndex;
         } catch (e) {
             const select = document.getElementById('model-select');
@@ -217,26 +374,24 @@
         // Add labels if they exist
         const modelData = allModels[modelId];
-        if (modelData) {
-            // Add reasoning label if reasoning is true
-            if (modelData.reasoning === true) {
-                const reasoningLabel = document.createElement('span');
-                reasoningLabel.className = 'model-label reasoning';
-                reasoningLabel.textContent = 'reasoning';
-                container.appendChild(reasoningLabel);
-            }
-            // Add other labels if they exist
-            if (modelData.labels && Array.isArray(modelData.labels)) {
-                modelData.labels.forEach(label => {
-                    const labelSpan = document.createElement('span');
-                    const labelLower = label.toLowerCase();
-                    const labelClass = (labelLower === 'vision') ? 'vision' : 'other';
-                    labelSpan.className = `model-label ${labelClass}`;
-                    labelSpan.textContent = label;
-                    container.appendChild(labelSpan);
-                });
-            }
+        if (modelData && modelData.labels && Array.isArray(modelData.labels)) {
+            modelData.labels.forEach(label => {
+                const labelSpan = document.createElement('span');
+                const labelLower = label.toLowerCase();
+                let labelClass = 'other';
+                if (labelLower === 'vision') {
+                    labelClass = 'vision';
+                } else if (labelLower === 'embeddings') {
+                    labelClass = 'embeddings';
+                } else if (labelLower === 'reasoning') {
+                    labelClass = 'reasoning';
+                } else if (labelLower === 'reranking') {
+                    labelClass = 'reranking';
+                }
+                labelSpan.className = `model-label ${labelClass}`;
+                labelSpan.textContent = label;
+                container.appendChild(labelSpan);
+            });
         }
         return container;
@@ -358,16 +513,110 @@
     const modelSelect = document.getElementById('model-select');
     let messages = [];
-    function appendMessage(role, text) {
+    function appendMessage(role, text, isMarkdown = false) {
         const div = document.createElement('div');
         div.className = 'chat-message ' + role;
         // Add a bubble for iMessage style
         const bubble = document.createElement('div');
         bubble.className = 'chat-bubble ' + role;
-        bubble.innerHTML = text;
+        if (role === 'llm' && isMarkdown) {
+            bubble.innerHTML = renderMarkdownWithThinkTokens(text);
+        } else {
+            bubble.textContent = text;
+        }
         div.appendChild(bubble);
         chatHistory.appendChild(div);
         chatHistory.scrollTop = chatHistory.scrollHeight;
+        return bubble; // Return the bubble element for streaming updates
+    }
+    function updateMessageContent(bubbleElement, text, isMarkdown = false) {
+        if (isMarkdown) {
+            bubbleElement.innerHTML = renderMarkdownWithThinkTokens(text);
+        } else {
+            bubbleElement.textContent = text;
+        }
+    }
+    function renderMarkdownWithThinkTokens(text) {
+        // Check if text contains opening think tag
+        if (text.includes('<think>')) {
+            if (text.includes('</think>')) {
+                // Complete think block - handle as before
+                const thinkMatch = text.match(/<think>(.*?)<\/think>/s);
+                if (thinkMatch) {
+                    const thinkContent = thinkMatch[1].trim();
+                    const mainResponse = text.replace(/<think>.*?<\/think>/s, '').trim();
+                    // Create collapsible structure
+                    let html = '';
+                    if (thinkContent) {
+                        html += `
+                            <div class="think-tokens-container">
+                                <div class="think-tokens-header" onclick="toggleThinkTokens(this)">
+                                    <span class="think-tokens-chevron">▼</span>
+                                    <span class="think-tokens-label">Thinking...</span>
+                                </div>
+                                <div class="think-tokens-content">
+                                    ${renderMarkdown(thinkContent)}
+                                </div>
+                            </div>
+                        `;
+                    }
+                    if (mainResponse) {
+                        html += `<div class="main-response">${renderMarkdown(mainResponse)}</div>`;
+                    }
+                    return html;
+                }
+            } else {
+                // Partial think block - only opening tag found, still being generated
+                const thinkMatch = text.match(/<think>(.*)/s);
+                if (thinkMatch) {
+                    const thinkContent = thinkMatch[1];
+                    const beforeThink = text.substring(0, text.indexOf('<think>'));
+                    let html = '';
+                    if (beforeThink.trim()) {
+                        html += `<div class="main-response">${renderMarkdown(beforeThink)}</div>`;
+                    }
+                    html += `
+                        <div class="think-tokens-container">
+                            <div class="think-tokens-header" onclick="toggleThinkTokens(this)">
+                                <span class="think-tokens-chevron">▼</span>
+                                <span class="think-tokens-label">Thinking...</span>
+                            </div>
+                            <div class="think-tokens-content">
+                                ${renderMarkdown(thinkContent)}
+                            </div>
+                        </div>
+                    `;
+                    return html;
+                }
+            }
+        }
+        // Fallback to normal markdown rendering
+        return renderMarkdown(text);
+    }
+    function toggleThinkTokens(header) {
+        const container = header.parentElement;
+        const content = container.querySelector('.think-tokens-content');
+        const chevron = header.querySelector('.think-tokens-chevron');
+        if (content.style.display === 'none') {
+            content.style.display = 'block';
+            chevron.textContent = '▼';
+            container.classList.remove('collapsed');
+        } else {
+            content.style.display = 'none';
+            chevron.textContent = '▶';
+            container.classList.add('collapsed');
+        }
     }
     async function sendMessage() {
@@ -379,8 +628,7 @@
         sendBtn.disabled = true;
         // Streaming OpenAI completions (placeholder, adapt as needed)
         let llmText = '';
-        appendMessage('llm', '...');
-        const llmDiv = chatHistory.lastChild.querySelector('.chat-bubble.llm');
+        const llmBubble = appendMessage('llm', '...');
         try {
             // Use the correct endpoint for chat completions
             const resp = await fetch(getServerBaseUrl() + '/api/v1/chat/completions', {
@@ -395,22 +643,40 @@
             if (!resp.body) throw new Error('No stream');
             const reader = resp.body.getReader();
             let decoder = new TextDecoder();
-            llmDiv.textContent = '';
+            llmBubble.textContent = '';
             while (true) {
                 const { done, value } = await reader.read();
                 if (done) break;
                 const chunk = decoder.decode(value);
                 if (chunk.trim() === 'data: [DONE]' || chunk.trim() === '[DONE]') continue;
-                // Try to extract the content from the OpenAI chunk
-                const match = chunk.match(/"content"\s*:\s*"([^"]*)"/);
-                if (match && match[1]) {
-                    llmText += match[1];
-                    llmDiv.textContent = llmText;
+                // Handle Server-Sent Events format
+                const lines = chunk.split('\n');
+                for (const line of lines) {
+                    if (line.startsWith('data: ')) {
+                        const jsonStr = line.substring(6).trim();
+                        if (jsonStr === '[DONE]') continue;
+                        try {
+                            const parsed = JSON.parse(jsonStr);
+                            if (parsed.choices && parsed.choices[0] && parsed.choices[0].delta && parsed.choices[0].delta.content) {
+                                llmText += parsed.choices[0].delta.content;
+                                updateMessageContent(llmBubble, llmText, true);
+                            }
+                        } catch (e) {
+                            // Fallback to regex parsing if JSON parsing fails
+                            const match = jsonStr.match(/"content"\s*:\s*"((?:\\.|[^"\\])*)"/);
+                            if (match && match[1]) {
+                                llmText += unescapeJsonString(match[1]);
+                                updateMessageContent(llmBubble, llmText, true);
+                            }
+                        }
+                    }
                 }
             }
             messages.push({ role: 'assistant', content: llmText });
         } catch (e) {
-            llmDiv.textContent = '[Error: ' + e.message + ']';
+            llmBubble.textContent = '[Error: ' + e.message + ']';
         }
         sendBtn.disabled = false;
     }

lemonade/version.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = "8.0.3"
1	+ __version__ = "8.0.4"

{lemonade_sdk-8.0.3.dist-info → lemonade_sdk-8.0.4.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: lemonade-sdk
-Version: 8.0.3
+Version: 8.0.4
 Summary: Lemonade SDK: Your LLM Aide for Validation and Deployment
 Author-email: lemonade@amd.com
 Requires-Python: >=3.10, <3.12
@@ -82,7 +82,7 @@ Dynamic: summary
 [![Lemonade tests](https://github.com/lemonade-sdk/lemonade/actions/workflows/test_lemonade.yml/badge.svg)](https://github.com/lemonade-sdk/lemonade/tree/main/test "Check out our tests")
 [![OS - Windows | Linux](https://img.shields.io/badge/OS-windows%20%7C%20linux-blue)](docs/README.md#installation "Check out our instructions")
-[![Made with Python](https://img.shields.io/badge/Python-3.8,3.10-blue?logo=python&logoColor=white)](docs/README.md#installation "Check out our instructions")
+[![Made with Python](https://img.shields.io/badge/Python-3.10-blue?logo=python&logoColor=white)](docs/README.md#installation "Check out our instructions")
 ## 🍋 Lemonade SDK: Quickly serve, benchmark and deploy LLMs
@@ -97,8 +97,8 @@ The [Lemonade SDK](./docs/README.md) makes it easy to run Large Language Models
 The [Lemonade SDK](./docs/README.md) is comprised of the following:
 - 🌐 **[Lemonade Server](https://lemonade-server.ai/docs)**: A local LLM server for running ONNX and GGUF models using the OpenAI API standard. Install and enable your applications with NPU and GPU acceleration in minutes.
-- 🐍 **Lemonade API**: High-level Python API to directly integrate Lemonade LLMs into Python applications.
-- 🖥️ **Lemonade CLI**: The `lemonade` CLI lets you mix-and-match LLMs (ONNX, GGUF, SafeTensors) with measurement tools to characterize your models on your hardware. The available tools are:
+- 🐍 **[Lemonade API](./docs/lemonade_api.md)**: High-level Python API to directly integrate Lemonade LLMs into Python applications.
+- 🖥️ **[Lemonade CLI](./docs/dev_cli/README.md)**: The `lemonade` CLI lets you mix-and-match LLMs (ONNX, GGUF, SafeTensors) with measurement tools to characterize your models on your hardware. The available tools are:
   - Prompting with templates.
   - Measuring accuracy with a variety of tests.
   - Benchmarking to get the time-to-first-token and tokens per second.
@@ -153,14 +153,7 @@ Maximum LLM performance requires the right hardware accelerator with the right i
   </tbody>
 </table>
-#### Inference Engines Overview
-| Engine | Description |
-| :--- | :--- |
-| **OnnxRuntime GenAI (OGA)** | Microsoft engine that runs `.onnx` models and enables hardware vendors to provide their own execution providers (EPs) to support specialized hardware, such as neural processing units (NPUs). |
-| **llamacpp** | Community-driven engine with strong GPU acceleration, support for thousands of `.gguf` models, and advanced features such as vision-language models (VLMs) and mixture-of-experts (MoEs). |
-| **Hugging Face (HF)** | Hugging Face's `transformers` library can run the original `.safetensors` trained weights for models on Meta's PyTorch engine, which provides a source of truth for accuracy measurement. |
+To learn more about the supported hardware and software, visit the documentation [here](./docs/README.md#software-and-hardware-overview).
 ## Integrate Lemonade Server with Your Application

{lemonade_sdk-8.0.3.dist-info → lemonade_sdk-8.0.4.dist-info}/RECORD RENAMED Viewed

@@ -4,7 +4,7 @@ lemonade/cache.py,sha256=djr2qgyUUAWlQv8FehU9qlNtCwK0IZqo82hcBDyZ3-A,2850
 lemonade/cli.py,sha256=9Pcs3PcrWC2F8_pcBaz09xHUICIJTvpemBdPGyXkjIk,4395
 lemonade/sequence.py,sha256=KSH7BPsiyDKsOsg_ziQKEGsDwMmuO_YbgPRBxkZd0pw,13267
 lemonade/state.py,sha256=sdSezla7Cd7KYL90xY3p9kcNV4ndSyN6UvNLOr3vBMA,5261
-lemonade/version.py,sha256=GImAlzwPDxsACkYFf5rTrX8QMH23tcqdm6vgjfFYD10,22
+lemonade/version.py,sha256=8H4GfArMIlRTCgSsTERRXsD3PA6Y67z17oTQOJnuUME,22
 lemonade/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 lemonade/common/build.py,sha256=zTb0m1-kuUx6zw5QHp2SNnVuN6jOTMQ2FCdj9iH374U,6140
 lemonade/common/cli_helpers.py,sha256=hjBfXrTtFl8gmCFlL-ksviXR0mOcdPtTWVNKoEp3PG4,4993
@@ -22,10 +22,10 @@ lemonade/tools/__init__.py,sha256=_6xRc-FHxmujoLjLjWtpYrWYEXtCSneSy-5ya01kyPk,53
 lemonade/tools/accuracy.py,sha256=9HCmczDngkBUuUrt49d2CkRo4J0qyWoFYs5cj20bGkg,11714
 lemonade/tools/adapter.py,sha256=HG54iMd6HDPZ4vnQIl7codq3HzffWbcHSIs_jVbNbhU,2958
 lemonade/tools/bench.py,sha256=aN5LMA_EH6-ZhAH3Gf26JYL7s0eKpUd3j-bReRhzvEY,10016
-lemonade/tools/humaneval.py,sha256=9lzsOaCSECf8LzqkQLFNwy1doAiZtK5gRN-RbZH7GLI,9532
+lemonade/tools/humaneval.py,sha256=JbxuoOzvR4iyxZv4R6MI7a3gUt5ef_Jj6Ie-9VP2wzY,9531
 lemonade/tools/management_tools.py,sha256=RO-lU-hjZhrP9KD9qcLI7MrLu-Rxnkrxzn45qqwKInE,8554
-lemonade/tools/mmlu.py,sha256=aEp9nMKTX5yaSaVZ15YmXbWE0YugjeAacnqjMZ13hHM,11072
-lemonade/tools/perplexity.py,sha256=xHl4cTBpJOCNcVxXhMv6eMp8fgUQmFM0G8DeRnx_rUk,5631
+lemonade/tools/mmlu.py,sha256=c2QaIMDzjqxCvgHlMXmy_dP1sAFkwkDxL7RO2nogI6s,11071
+lemonade/tools/perplexity.py,sha256=eiaTZ3yhqF2pfwOffVbKKJLwjSri7Im2pC-tBJr7LLU,5638
 lemonade/tools/prompt.py,sha256=cy6McZeLgk26xG1dJEY-cYnY2x8FUdyOOSG86WfBKCg,9348
 lemonade/tools/tool.py,sha256=UsxVYukfm_iM3BfeGYPZxQlTK5UfDfDOl3RIyLr8A1Y,13256
 lemonade/tools/huggingface/bench.py,sha256=-mTfldCtquL4mspq8ykVwDc9Mut5Ecv_jHJnSb0CYGE,6734
@@ -35,36 +35,36 @@ lemonade/tools/llamacpp/bench.py,sha256=A1X8ULQMxPVsff-AdiUsbWQUKpx7U7nFRNHFJRPd
 lemonade/tools/llamacpp/load.py,sha256=o3vVlefdxmdkHnuvFR3TOxiJkpNAuNFcs9Whfp24jpg,9236
 lemonade/tools/oga/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 lemonade/tools/oga/bench.py,sha256=T3c40NevM3NA7CT98B6vBj1nXfdITDqpfMHYSjhjwpA,5061
-lemonade/tools/oga/load.py,sha256=xSP0DWoGd5zBRozSafj1MMyIQyHJuIRj_vNlCTx8mfs,28309
+lemonade/tools/oga/load.py,sha256=XSznW8lOX_KafSq5J5mIBJzj8YJEBpK0RFGcTE1wnE8,28317
 lemonade/tools/oga/utils.py,sha256=p7faMNfT-rLURC9t_s1S_STQRzzLADqbngUliTOOXeQ,16144
 lemonade/tools/quark/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-lemonade/tools/quark/quark_load.py,sha256=tNy-G9yEJ5cTsxw9LmGUYmmdlEzMo_iy-KSIc2YVz6U,5581
-lemonade/tools/quark/quark_quantize.py,sha256=LZrcbLf9oIw7FW2ccP_qkCP32jxmz5YnNEaoY6rsAuY,16583
+lemonade/tools/quark/quark_load.py,sha256=FJ4LJKTToZbHHWVEOBLadae1a3jCnnY4KvXySHbkJMA,5589
+lemonade/tools/quark/quark_quantize.py,sha256=hwoaXhpBIORvJ16MvewphPkaDEQn3BAgXq5o82Gc-_s,16599
 lemonade/tools/report/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 lemonade/tools/report/llm_report.py,sha256=bVHhwCINA-Ok2EdSwAsLubsc83N3KWOVuwTguw7jDcE,6676
 lemonade/tools/report/table.py,sha256=wJFzKtlmGQH0RQ5O9nevtpMe_-zQ-8zNOndINQuzsjM,27793
 lemonade/tools/server/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-lemonade/tools/server/llamacpp.py,sha256=vjFNelm_VyKBBgWmltsAwLI7ncQ9AwVFQD7krZnF42w,16199
-lemonade/tools/server/serve.py,sha256=3_jBpi6THnnAmtKOxvPlOkIhSTTmrlZE3fr2Dpto-Q4,52794
+lemonade/tools/server/llamacpp.py,sha256=e1MYKSJBu-jlOE5GQSBsC9CUPAeqw5wXXxoxBKA5zb8,20038
+lemonade/tools/server/serve.py,sha256=ORffC4bcBJ-L5-JbmZX91X3yHt1JWxZcIjrZuu9x8TQ,56165
 lemonade/tools/server/tool_calls.py,sha256=xrAlQwKG-nv2xLlf8f9CDSaUbyMn8ZtHkds9iZLG9K8,5230
 lemonade/tools/server/tray.py,sha256=4Kf3x8YfRaItPW7lxlEwerD7c5Q2snzcNk3ZrEoae58,17259
 lemonade/tools/server/webapp.py,sha256=8Das5yXOaSBLZmSZ_eddJajQFxBhvl5D6GI_hHlGbE0,1040
 lemonade/tools/server/static/favicon.ico,sha256=hMmP9qGJNeZ0mFS86JIqPbZstXMZn0Z76_HfHQpREAU,126745
-lemonade/tools/server/static/styles.css,sha256=u-SzZ-vh5qEFMDSKLHJ7MsQwvwpJLB_DdJxocf06Sro,16880
-lemonade/tools/server/static/webapp.html,sha256=kPzORaogVRdFQewXyNI_JaH2ZZCTaq5zfMSyzuoFTuA,22414
+lemonade/tools/server/static/styles.css,sha256=x-pf7xts0te9JWAafcNFqzE7r1fl6n_H362Eiz49ixI,24722
+lemonade/tools/server/static/webapp.html,sha256=AS61ZBDnZkIUpT-iZFlTnWpkp6Yeozs4obzauX4crlU,35004
 lemonade/tools/server/utils/port.py,sha256=XnIg2qS73QRrsJn6LgHcrJPmku30Tv6vsYcBVMj82K4,2186
 lemonade/tools/server/utils/system_tray.py,sha256=b9lvNv9chJKQxvmH7qzAuUe6H9HsLu7pdHFqGlAJaL0,12654
 lemonade/tools/server/utils/thread.py,sha256=pK9K_6DNWoQ78NArkAX3Ym2WsxLnCs9sKTk6TitlYnI,2804
 lemonade_install/__init__.py,sha256=26zohKg2jgr_5y7tObduWMYQg8zCTWMZHL8lfi2zZVQ,40
 lemonade_install/install.py,sha256=DJWR36QSjZtvEwRjYPNSjhYgoxLjI_6OPrCMZjL0ChY,28263
-lemonade_sdk-8.0.3.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-lemonade_sdk-8.0.3.dist-info/licenses/NOTICE.md,sha256=B8lEqi4QE41J9ljz4Riv2JgHD1v8GCZE6nNBHO3KIA0,2135
+lemonade_sdk-8.0.4.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+lemonade_sdk-8.0.4.dist-info/licenses/NOTICE.md,sha256=B8lEqi4QE41J9ljz4Riv2JgHD1v8GCZE6nNBHO3KIA0,2135
 lemonade_server/cli.py,sha256=z6ojwFaOIz0hbUbVtZWMLP4YDpkcVOmqwmdm55dhKA4,11980
-lemonade_server/model_manager.py,sha256=Yvlsl0wipKfryKULH5ASQ9INhLQXPq9dTGQVBXf2_h0,16167
-lemonade_server/pydantic_models.py,sha256=nsbpHqAkd6nkz5QT16u9xMZbCXqccGiy5O0fWecOM88,2338
-lemonade_server/server_models.json,sha256=O5zk94gH_zRq6GSwbqvi2SNwx51eY9uqgAl_kxTi0iM,7271
-lemonade_sdk-8.0.3.dist-info/METADATA,sha256=WesWziLri9jQjZILRENliiJbggTVF8LmXKVIERInVbE,8285
-lemonade_sdk-8.0.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-lemonade_sdk-8.0.3.dist-info/entry_points.txt,sha256=gJppn0ETtXXR6ceKWEIRdk42kMC7ps59EmU3NCPyPUk,144
-lemonade_sdk-8.0.3.dist-info/top_level.txt,sha256=10ap5GNiPhalO4V50LRoxA1FqRT9g3Xkia6BITu880k,42
-lemonade_sdk-8.0.3.dist-info/RECORD,,
+lemonade_server/model_manager.py,sha256=0HqLR38uOu_hxRWVYQ_P6YmwaR-jkDuaAqGYo60X8C0,16702
+lemonade_server/pydantic_models.py,sha256=rp_FFhoTwg6jNmgol-kShwffnRDGbt7jTbIeELvgOIo,2876
+lemonade_server/server_models.json,sha256=Y-j9KAvHmfv77welC0rfRao4inLBce6AVySb-oy_uNE,7519
+lemonade_sdk-8.0.4.dist-info/METADATA,sha256=FqA9Jtgx1QE1EjLg_lxcfcAMI3j0cKpZxoe4GnaGLRA,7754
+lemonade_sdk-8.0.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+lemonade_sdk-8.0.4.dist-info/entry_points.txt,sha256=gJppn0ETtXXR6ceKWEIRdk42kMC7ps59EmU3NCPyPUk,144
+lemonade_sdk-8.0.4.dist-info/top_level.txt,sha256=10ap5GNiPhalO4V50LRoxA1FqRT9g3Xkia6BITu880k,42
+lemonade_sdk-8.0.4.dist-info/RECORD,,

lemonade_server/model_manager.py CHANGED Viewed

@@ -54,6 +54,17 @@ class ModelManager:
                 for model_name, model_info in user_models.items()
             }
+            # Backwards compatibility for user models that were created before version 8.0.4
+            # "reasoning" was a boolean, but as of 8.0.4 it became a label
+            for _, model_info in user_models.items():
+                if "reasoning" in model_info:
+                    model_info["labels"] = (
+                        ["reasoning"]
+                        if not model_info["labels"]
+                        else model_info["labels"] + ["reasoning"]
+                    )
+                    del model_info["reasoning"]
             models.update(user_models)
         # Add the model name as a key in each entry, to make it easier
@@ -268,9 +279,8 @@ class ModelManager:
                 new_user_model = {
                     "checkpoint": checkpoint,
                     "recipe": recipe,
-                    "reasoning": reasoning,
                     "suggested": True,
-                    "labels": ["custom"],
+                    "labels": ["custom"] + (["reasoning"] if reasoning else []),
                 }
                 if mmproj:

lemonade_server/pydantic_models.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from typing import Optional
+from typing import Optional, Union, List, Any
 from pydantic import BaseModel
@@ -65,6 +65,30 @@ class ChatCompletionRequest(BaseModel):
     response_format: dict | None = None
+class EmbeddingsRequest(BaseModel):
+    """
+    Request model for embeddings API endpoint.
+    Generates embeddings for the provided input text or tokens.
+    """
+    input: Union[str, List]
+    model: Optional[str] = None
+    encoding_format: Optional[str] = "float"  # "float" or "base64"
+class RerankingRequest(BaseModel):
+    """
+    Request model for reranking API endpoint.
+    Reranks a list of documents based on their relevance to a query.
+    """
+    query: str
+    documents: List[str]
+    model: str
 class ResponsesRequest(BaseModel):
     """
     Request model for responses API endpoint.

lemonade-sdk 8.0.3__py3-none-any.whl → 8.0.4__py3-none-any.whl

Potentially problematic release.

lemonade-sdk 8.0.3py3-none-any.whl → 8.0.4py3-none-any.whl