lemonade-sdk 8.0.3__py3-none-any.whl → 8.0.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lemonade-sdk might be problematic. Click here for more details.

@@ -33,7 +33,47 @@
33
33
  <input type="text" id="chat-input" placeholder="Type your message..." />
34
34
  <button id="send-btn">Send</button>
35
35
  </div>
36
- </div>
36
+ </div>
37
+ <!-- App Suggestions Section -->
38
+ <div class="app-suggestions-section">
39
+ <div class="suggestion-text">
40
+ Use Lemonade with your favorite app
41
+ </div>
42
+ <div class="app-logos-grid">
43
+ <a href="https://lemonade-server.ai/docs/server/apps/open-webui/" target="_blank" class="app-logo-item" title="Open WebUI">
44
+ <img src="https://raw.githubusercontent.com/lemonade-sdk/assets/refs/heads/main/partner_logos/openwebui.jpg" alt="Open WebUI" class="app-logo-img">
45
+ <span class="app-name">Open WebUI</span>
46
+ </a>
47
+ <a href="https://lemonade-server.ai/docs/server/apps/continue/" target="_blank" class="app-logo-item" title="Continue">
48
+ <img src="https://raw.githubusercontent.com/lemonade-sdk/assets/refs/heads/main/partner_logos/continue_dev.png" alt="Continue" class="app-logo-img">
49
+ <span class="app-name">Continue</span>
50
+ </a>
51
+ <a href="https://github.com/amd/gaia" target="_blank" class="app-logo-item" title="Gaia">
52
+ <img src="https://raw.githubusercontent.com/lemonade-sdk/assets/refs/heads/main/partner_logos/gaia.ico" alt="Gaia" class="app-logo-img">
53
+ <span class="app-name">Gaia</span>
54
+ </a>
55
+ <a href="https://lemonade-server.ai/docs/server/apps/anythingLLM/" target="_blank" class="app-logo-item" title="AnythingLLM">
56
+ <img src="https://raw.githubusercontent.com/lemonade-sdk/assets/refs/heads/main/partner_logos/anything_llm.png" alt="AnythingLLM" class="app-logo-img">
57
+ <span class="app-name">AnythingLLM</span>
58
+ </a>
59
+ <a href="https://lemonade-server.ai/docs/server/apps/ai-dev-gallery/" target="_blank" class="app-logo-item" title="AI Dev Gallery">
60
+ <img src="https://raw.githubusercontent.com/lemonade-sdk/assets/refs/heads/main/partner_logos/ai_dev_gallery.webp" alt="AI Dev Gallery" class="app-logo-img">
61
+ <span class="app-name">AI Dev Gallery</span>
62
+ </a>
63
+ <a href="https://lemonade-server.ai/docs/server/apps/lm-eval/" target="_blank" class="app-logo-item" title="LM-Eval">
64
+ <img src="https://raw.githubusercontent.com/lemonade-sdk/assets/refs/heads/main/partner_logos/lm_eval.png" alt="LM-Eval" class="app-logo-img">
65
+ <span class="app-name">LM-Eval</span>
66
+ </a>
67
+ <a href="https://lemonade-server.ai/docs/server/apps/codeGPT/" target="_blank" class="app-logo-item" title="CodeGPT">
68
+ <img src="https://raw.githubusercontent.com/lemonade-sdk/assets/refs/heads/main/partner_logos/codegpt.jpg" alt="CodeGPT" class="app-logo-img">
69
+ <span class="app-name">CodeGPT</span>
70
+ </a>
71
+ <a href="https://github.com/lemonade-sdk/lemonade/blob/main/docs/server/apps/ai-toolkit.md" target="_blank" class="app-logo-item" title="AI Toolkit">
72
+ <img src="https://raw.githubusercontent.com/lemonade-sdk/assets/refs/heads/main/partner_logos/ai_toolkit.png" alt="AI Toolkit" class="app-logo-img">
73
+ <span class="app-name">AI Toolkit</span>
74
+ </a>
75
+ </div>
76
+ </div>
37
77
  </div>
38
78
  <div class="tab-content" id="content-models"> <div class="model-mgmt-register-form collapsed"> <h3 class="model-mgmt-form-title" onclick="toggleAddModelForm()">
39
79
  Add a Model
@@ -109,7 +149,66 @@
109
149
  <div class="copyright">Copyright 2025 AMD</div>
110
150
  </footer>
111
151
  <script src="https://cdn.jsdelivr.net/npm/openai@4.21.0/dist/openai.min.js"></script>
112
- <script> // Tab switching logic
152
+ <script src="https://cdn.jsdelivr.net/npm/marked@9.1.0/marked.min.js"></script>
153
+ <script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script>
154
+ <script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
155
+ <script>
156
+ // Configure MathJax
157
+ window.MathJax = {
158
+ tex: {
159
+ inlineMath: [['\\(', '\\)'], ['$', '$']],
160
+ displayMath: [['\\[', '\\]'], ['$$', '$$']],
161
+ processEscapes: true,
162
+ processEnvironments: true
163
+ },
164
+ options: {
165
+ skipHtmlTags: ['script', 'noscript', 'style', 'textarea', 'pre']
166
+ }
167
+ };
168
+ </script>
169
+ <script>
170
+ // Configure marked.js for safe HTML rendering
171
+ marked.setOptions({
172
+ breaks: true,
173
+ gfm: true,
174
+ sanitize: false,
175
+ smartLists: true,
176
+ smartypants: true
177
+ });
178
+
179
+ // Function to unescape JSON strings
180
+ function unescapeJsonString(str) {
181
+ try {
182
+ return str.replace(/\\n/g, '\n')
183
+ .replace(/\\t/g, '\t')
184
+ .replace(/\\r/g, '\r')
185
+ .replace(/\\"/g, '"')
186
+ .replace(/\\\\/g, '\\');
187
+ } catch (error) {
188
+ console.error('Error unescaping string:', error);
189
+ return str;
190
+ }
191
+ }
192
+
193
+ // Function to safely render markdown with MathJax support
194
+ function renderMarkdown(text) {
195
+ try {
196
+ const html = marked.parse(text);
197
+ // Trigger MathJax to process the new content
198
+ if (window.MathJax && window.MathJax.typesetPromise) {
199
+ // Use a timeout to ensure DOM is updated before typesetting
200
+ setTimeout(() => {
201
+ window.MathJax.typesetPromise();
202
+ }, 0);
203
+ }
204
+ return html;
205
+ } catch (error) {
206
+ console.error('Error rendering markdown:', error);
207
+ return text; // fallback to plain text
208
+ }
209
+ }
210
+
211
+ // Tab switching logic
113
212
  function showTab(tab, updateHash = true) {
114
213
  document.getElementById('tab-chat').classList.remove('active');
115
214
  document.getElementById('tab-models').classList.remove('active');
@@ -163,6 +262,44 @@
163
262
  form.classList.toggle('collapsed');
164
263
  }
165
264
 
265
+ // Handle image load failures for app logos
266
+ function handleImageFailure(img) {
267
+ const logoItem = img.closest('.app-logo-item');
268
+ if (logoItem) {
269
+ logoItem.classList.add('image-failed');
270
+ }
271
+ }
272
+
273
+ // Set up image error handlers when DOM is loaded
274
+ document.addEventListener('DOMContentLoaded', function() {
275
+ const logoImages = document.querySelectorAll('.app-logo-img');
276
+ logoImages.forEach(function(img) {
277
+ let imageLoaded = false;
278
+
279
+ img.addEventListener('load', function() {
280
+ imageLoaded = true;
281
+ });
282
+
283
+ img.addEventListener('error', function() {
284
+ if (!imageLoaded) {
285
+ handleImageFailure(this);
286
+ }
287
+ });
288
+
289
+ // Also check if image is already broken (cached failure)
290
+ if (img.complete && img.naturalWidth === 0) {
291
+ handleImageFailure(img);
292
+ }
293
+
294
+ // Timeout fallback for slow connections (5 seconds)
295
+ setTimeout(function() {
296
+ if (!imageLoaded && !img.complete) {
297
+ handleImageFailure(img);
298
+ }
299
+ }, 5000);
300
+ });
301
+ });
302
+
166
303
  // Helper to get server base URL
167
304
  function getServerBaseUrl() {
168
305
  const port = window.SERVER_PORT || 8000;
@@ -184,17 +321,37 @@
184
321
  select.innerHTML = '<option>No models available</option>';
185
322
  return;
186
323
  }
324
+
325
+ // Filter out embedding models from chat interface
326
+ const allModels = window.SERVER_MODELS || {};
327
+ let filteredModels = [];
187
328
  let defaultIndex = 0;
188
- data.data.forEach(function(model, index) {
329
+
330
+ data.data.forEach(function(model) {
189
331
  const modelId = model.id || model.name || model;
332
+ const modelInfo = allModels[modelId] || {};
333
+ const labels = modelInfo.labels || [];
334
+
335
+ // Skip models with "embeddings" or "reranking" label
336
+ if (labels.includes('embeddings') || labels.includes('reranking')) {
337
+ return;
338
+ }
339
+
340
+ filteredModels.push(modelId);
190
341
  const opt = document.createElement('option');
191
342
  opt.value = modelId;
192
343
  opt.textContent = modelId;
193
344
  if (modelId === 'Llama-3.2-1B-Instruct-Hybrid') {
194
- defaultIndex = index;
345
+ defaultIndex = filteredModels.length - 1;
195
346
  }
196
347
  select.appendChild(opt);
197
348
  });
349
+
350
+ if (filteredModels.length === 0) {
351
+ select.innerHTML = '<option>No chat models available</option>';
352
+ return;
353
+ }
354
+
198
355
  select.selectedIndex = defaultIndex;
199
356
  } catch (e) {
200
357
  const select = document.getElementById('model-select');
@@ -217,26 +374,24 @@
217
374
 
218
375
  // Add labels if they exist
219
376
  const modelData = allModels[modelId];
220
- if (modelData) {
221
- // Add reasoning label if reasoning is true
222
- if (modelData.reasoning === true) {
223
- const reasoningLabel = document.createElement('span');
224
- reasoningLabel.className = 'model-label reasoning';
225
- reasoningLabel.textContent = 'reasoning';
226
- container.appendChild(reasoningLabel);
227
- }
228
-
229
- // Add other labels if they exist
230
- if (modelData.labels && Array.isArray(modelData.labels)) {
231
- modelData.labels.forEach(label => {
232
- const labelSpan = document.createElement('span');
233
- const labelLower = label.toLowerCase();
234
- const labelClass = (labelLower === 'vision') ? 'vision' : 'other';
235
- labelSpan.className = `model-label ${labelClass}`;
236
- labelSpan.textContent = label;
237
- container.appendChild(labelSpan);
238
- });
239
- }
377
+ if (modelData && modelData.labels && Array.isArray(modelData.labels)) {
378
+ modelData.labels.forEach(label => {
379
+ const labelSpan = document.createElement('span');
380
+ const labelLower = label.toLowerCase();
381
+ let labelClass = 'other';
382
+ if (labelLower === 'vision') {
383
+ labelClass = 'vision';
384
+ } else if (labelLower === 'embeddings') {
385
+ labelClass = 'embeddings';
386
+ } else if (labelLower === 'reasoning') {
387
+ labelClass = 'reasoning';
388
+ } else if (labelLower === 'reranking') {
389
+ labelClass = 'reranking';
390
+ }
391
+ labelSpan.className = `model-label ${labelClass}`;
392
+ labelSpan.textContent = label;
393
+ container.appendChild(labelSpan);
394
+ });
240
395
  }
241
396
 
242
397
  return container;
@@ -358,16 +513,110 @@
358
513
  const modelSelect = document.getElementById('model-select');
359
514
  let messages = [];
360
515
 
361
- function appendMessage(role, text) {
516
+ function appendMessage(role, text, isMarkdown = false) {
362
517
  const div = document.createElement('div');
363
518
  div.className = 'chat-message ' + role;
364
519
  // Add a bubble for iMessage style
365
520
  const bubble = document.createElement('div');
366
521
  bubble.className = 'chat-bubble ' + role;
367
- bubble.innerHTML = text;
522
+
523
+ if (role === 'llm' && isMarkdown) {
524
+ bubble.innerHTML = renderMarkdownWithThinkTokens(text);
525
+ } else {
526
+ bubble.textContent = text;
527
+ }
528
+
368
529
  div.appendChild(bubble);
369
530
  chatHistory.appendChild(div);
370
531
  chatHistory.scrollTop = chatHistory.scrollHeight;
532
+ return bubble; // Return the bubble element for streaming updates
533
+ }
534
+
535
+ function updateMessageContent(bubbleElement, text, isMarkdown = false) {
536
+ if (isMarkdown) {
537
+ bubbleElement.innerHTML = renderMarkdownWithThinkTokens(text);
538
+ } else {
539
+ bubbleElement.textContent = text;
540
+ }
541
+ }
542
+
543
+ function renderMarkdownWithThinkTokens(text) {
544
+ // Check if text contains opening think tag
545
+ if (text.includes('<think>')) {
546
+ if (text.includes('</think>')) {
547
+ // Complete think block - handle as before
548
+ const thinkMatch = text.match(/<think>(.*?)<\/think>/s);
549
+ if (thinkMatch) {
550
+ const thinkContent = thinkMatch[1].trim();
551
+ const mainResponse = text.replace(/<think>.*?<\/think>/s, '').trim();
552
+
553
+ // Create collapsible structure
554
+ let html = '';
555
+ if (thinkContent) {
556
+ html += `
557
+ <div class="think-tokens-container">
558
+ <div class="think-tokens-header" onclick="toggleThinkTokens(this)">
559
+ <span class="think-tokens-chevron">▼</span>
560
+ <span class="think-tokens-label">Thinking...</span>
561
+ </div>
562
+ <div class="think-tokens-content">
563
+ ${renderMarkdown(thinkContent)}
564
+ </div>
565
+ </div>
566
+ `;
567
+ }
568
+ if (mainResponse) {
569
+ html += `<div class="main-response">${renderMarkdown(mainResponse)}</div>`;
570
+ }
571
+ return html;
572
+ }
573
+ } else {
574
+ // Partial think block - only opening tag found, still being generated
575
+ const thinkMatch = text.match(/<think>(.*)/s);
576
+ if (thinkMatch) {
577
+ const thinkContent = thinkMatch[1];
578
+ const beforeThink = text.substring(0, text.indexOf('<think>'));
579
+
580
+ let html = '';
581
+ if (beforeThink.trim()) {
582
+ html += `<div class="main-response">${renderMarkdown(beforeThink)}</div>`;
583
+ }
584
+
585
+ html += `
586
+ <div class="think-tokens-container">
587
+ <div class="think-tokens-header" onclick="toggleThinkTokens(this)">
588
+ <span class="think-tokens-chevron">▼</span>
589
+ <span class="think-tokens-label">Thinking...</span>
590
+ </div>
591
+ <div class="think-tokens-content">
592
+ ${renderMarkdown(thinkContent)}
593
+ </div>
594
+ </div>
595
+ `;
596
+
597
+ return html;
598
+ }
599
+ }
600
+ }
601
+
602
+ // Fallback to normal markdown rendering
603
+ return renderMarkdown(text);
604
+ }
605
+
606
+ function toggleThinkTokens(header) {
607
+ const container = header.parentElement;
608
+ const content = container.querySelector('.think-tokens-content');
609
+ const chevron = header.querySelector('.think-tokens-chevron');
610
+
611
+ if (content.style.display === 'none') {
612
+ content.style.display = 'block';
613
+ chevron.textContent = '▼';
614
+ container.classList.remove('collapsed');
615
+ } else {
616
+ content.style.display = 'none';
617
+ chevron.textContent = '▶';
618
+ container.classList.add('collapsed');
619
+ }
371
620
  }
372
621
 
373
622
  async function sendMessage() {
@@ -379,8 +628,7 @@
379
628
  sendBtn.disabled = true;
380
629
  // Streaming OpenAI completions (placeholder, adapt as needed)
381
630
  let llmText = '';
382
- appendMessage('llm', '...');
383
- const llmDiv = chatHistory.lastChild.querySelector('.chat-bubble.llm');
631
+ const llmBubble = appendMessage('llm', '...');
384
632
  try {
385
633
  // Use the correct endpoint for chat completions
386
634
  const resp = await fetch(getServerBaseUrl() + '/api/v1/chat/completions', {
@@ -395,22 +643,40 @@
395
643
  if (!resp.body) throw new Error('No stream');
396
644
  const reader = resp.body.getReader();
397
645
  let decoder = new TextDecoder();
398
- llmDiv.textContent = '';
646
+ llmBubble.textContent = '';
399
647
  while (true) {
400
648
  const { done, value } = await reader.read();
401
649
  if (done) break;
402
650
  const chunk = decoder.decode(value);
403
651
  if (chunk.trim() === 'data: [DONE]' || chunk.trim() === '[DONE]') continue;
404
- // Try to extract the content from the OpenAI chunk
405
- const match = chunk.match(/"content"\s*:\s*"([^"]*)"/);
406
- if (match && match[1]) {
407
- llmText += match[1];
408
- llmDiv.textContent = llmText;
652
+
653
+ // Handle Server-Sent Events format
654
+ const lines = chunk.split('\n');
655
+ for (const line of lines) {
656
+ if (line.startsWith('data: ')) {
657
+ const jsonStr = line.substring(6).trim();
658
+ if (jsonStr === '[DONE]') continue;
659
+
660
+ try {
661
+ const parsed = JSON.parse(jsonStr);
662
+ if (parsed.choices && parsed.choices[0] && parsed.choices[0].delta && parsed.choices[0].delta.content) {
663
+ llmText += parsed.choices[0].delta.content;
664
+ updateMessageContent(llmBubble, llmText, true);
665
+ }
666
+ } catch (e) {
667
+ // Fallback to regex parsing if JSON parsing fails
668
+ const match = jsonStr.match(/"content"\s*:\s*"((?:\\.|[^"\\])*)"/);
669
+ if (match && match[1]) {
670
+ llmText += unescapeJsonString(match[1]);
671
+ updateMessageContent(llmBubble, llmText, true);
672
+ }
673
+ }
674
+ }
409
675
  }
410
676
  }
411
677
  messages.push({ role: 'assistant', content: llmText });
412
678
  } catch (e) {
413
- llmDiv.textContent = '[Error: ' + e.message + ']';
679
+ llmBubble.textContent = '[Error: ' + e.message + ']';
414
680
  }
415
681
  sendBtn.disabled = false;
416
682
  }
lemonade/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "8.0.3"
1
+ __version__ = "8.0.4"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lemonade-sdk
3
- Version: 8.0.3
3
+ Version: 8.0.4
4
4
  Summary: Lemonade SDK: Your LLM Aide for Validation and Deployment
5
5
  Author-email: lemonade@amd.com
6
6
  Requires-Python: >=3.10, <3.12
@@ -82,7 +82,7 @@ Dynamic: summary
82
82
 
83
83
  [![Lemonade tests](https://github.com/lemonade-sdk/lemonade/actions/workflows/test_lemonade.yml/badge.svg)](https://github.com/lemonade-sdk/lemonade/tree/main/test "Check out our tests")
84
84
  [![OS - Windows | Linux](https://img.shields.io/badge/OS-windows%20%7C%20linux-blue)](docs/README.md#installation "Check out our instructions")
85
- [![Made with Python](https://img.shields.io/badge/Python-3.8,3.10-blue?logo=python&logoColor=white)](docs/README.md#installation "Check out our instructions")
85
+ [![Made with Python](https://img.shields.io/badge/Python-3.10-blue?logo=python&logoColor=white)](docs/README.md#installation "Check out our instructions")
86
86
 
87
87
  ## 🍋 Lemonade SDK: Quickly serve, benchmark and deploy LLMs
88
88
 
@@ -97,8 +97,8 @@ The [Lemonade SDK](./docs/README.md) makes it easy to run Large Language Models
97
97
  The [Lemonade SDK](./docs/README.md) is comprised of the following:
98
98
 
99
99
  - 🌐 **[Lemonade Server](https://lemonade-server.ai/docs)**: A local LLM server for running ONNX and GGUF models using the OpenAI API standard. Install and enable your applications with NPU and GPU acceleration in minutes.
100
- - 🐍 **Lemonade API**: High-level Python API to directly integrate Lemonade LLMs into Python applications.
101
- - 🖥️ **Lemonade CLI**: The `lemonade` CLI lets you mix-and-match LLMs (ONNX, GGUF, SafeTensors) with measurement tools to characterize your models on your hardware. The available tools are:
100
+ - 🐍 **[Lemonade API](./docs/lemonade_api.md)**: High-level Python API to directly integrate Lemonade LLMs into Python applications.
101
+ - 🖥️ **[Lemonade CLI](./docs/dev_cli/README.md)**: The `lemonade` CLI lets you mix-and-match LLMs (ONNX, GGUF, SafeTensors) with measurement tools to characterize your models on your hardware. The available tools are:
102
102
  - Prompting with templates.
103
103
  - Measuring accuracy with a variety of tests.
104
104
  - Benchmarking to get the time-to-first-token and tokens per second.
@@ -153,14 +153,7 @@ Maximum LLM performance requires the right hardware accelerator with the right i
153
153
  </tbody>
154
154
  </table>
155
155
 
156
-
157
-
158
- #### Inference Engines Overview
159
- | Engine | Description |
160
- | :--- | :--- |
161
- | **OnnxRuntime GenAI (OGA)** | Microsoft engine that runs `.onnx` models and enables hardware vendors to provide their own execution providers (EPs) to support specialized hardware, such as neural processing units (NPUs). |
162
- | **llamacpp** | Community-driven engine with strong GPU acceleration, support for thousands of `.gguf` models, and advanced features such as vision-language models (VLMs) and mixture-of-experts (MoEs). |
163
- | **Hugging Face (HF)** | Hugging Face's `transformers` library can run the original `.safetensors` trained weights for models on Meta's PyTorch engine, which provides a source of truth for accuracy measurement. |
156
+ To learn more about the supported hardware and software, visit the documentation [here](./docs/README.md#software-and-hardware-overview).
164
157
 
165
158
  ## Integrate Lemonade Server with Your Application
166
159
 
@@ -4,7 +4,7 @@ lemonade/cache.py,sha256=djr2qgyUUAWlQv8FehU9qlNtCwK0IZqo82hcBDyZ3-A,2850
4
4
  lemonade/cli.py,sha256=9Pcs3PcrWC2F8_pcBaz09xHUICIJTvpemBdPGyXkjIk,4395
5
5
  lemonade/sequence.py,sha256=KSH7BPsiyDKsOsg_ziQKEGsDwMmuO_YbgPRBxkZd0pw,13267
6
6
  lemonade/state.py,sha256=sdSezla7Cd7KYL90xY3p9kcNV4ndSyN6UvNLOr3vBMA,5261
7
- lemonade/version.py,sha256=GImAlzwPDxsACkYFf5rTrX8QMH23tcqdm6vgjfFYD10,22
7
+ lemonade/version.py,sha256=8H4GfArMIlRTCgSsTERRXsD3PA6Y67z17oTQOJnuUME,22
8
8
  lemonade/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
9
  lemonade/common/build.py,sha256=zTb0m1-kuUx6zw5QHp2SNnVuN6jOTMQ2FCdj9iH374U,6140
10
10
  lemonade/common/cli_helpers.py,sha256=hjBfXrTtFl8gmCFlL-ksviXR0mOcdPtTWVNKoEp3PG4,4993
@@ -22,10 +22,10 @@ lemonade/tools/__init__.py,sha256=_6xRc-FHxmujoLjLjWtpYrWYEXtCSneSy-5ya01kyPk,53
22
22
  lemonade/tools/accuracy.py,sha256=9HCmczDngkBUuUrt49d2CkRo4J0qyWoFYs5cj20bGkg,11714
23
23
  lemonade/tools/adapter.py,sha256=HG54iMd6HDPZ4vnQIl7codq3HzffWbcHSIs_jVbNbhU,2958
24
24
  lemonade/tools/bench.py,sha256=aN5LMA_EH6-ZhAH3Gf26JYL7s0eKpUd3j-bReRhzvEY,10016
25
- lemonade/tools/humaneval.py,sha256=9lzsOaCSECf8LzqkQLFNwy1doAiZtK5gRN-RbZH7GLI,9532
25
+ lemonade/tools/humaneval.py,sha256=JbxuoOzvR4iyxZv4R6MI7a3gUt5ef_Jj6Ie-9VP2wzY,9531
26
26
  lemonade/tools/management_tools.py,sha256=RO-lU-hjZhrP9KD9qcLI7MrLu-Rxnkrxzn45qqwKInE,8554
27
- lemonade/tools/mmlu.py,sha256=aEp9nMKTX5yaSaVZ15YmXbWE0YugjeAacnqjMZ13hHM,11072
28
- lemonade/tools/perplexity.py,sha256=xHl4cTBpJOCNcVxXhMv6eMp8fgUQmFM0G8DeRnx_rUk,5631
27
+ lemonade/tools/mmlu.py,sha256=c2QaIMDzjqxCvgHlMXmy_dP1sAFkwkDxL7RO2nogI6s,11071
28
+ lemonade/tools/perplexity.py,sha256=eiaTZ3yhqF2pfwOffVbKKJLwjSri7Im2pC-tBJr7LLU,5638
29
29
  lemonade/tools/prompt.py,sha256=cy6McZeLgk26xG1dJEY-cYnY2x8FUdyOOSG86WfBKCg,9348
30
30
  lemonade/tools/tool.py,sha256=UsxVYukfm_iM3BfeGYPZxQlTK5UfDfDOl3RIyLr8A1Y,13256
31
31
  lemonade/tools/huggingface/bench.py,sha256=-mTfldCtquL4mspq8ykVwDc9Mut5Ecv_jHJnSb0CYGE,6734
@@ -35,36 +35,36 @@ lemonade/tools/llamacpp/bench.py,sha256=A1X8ULQMxPVsff-AdiUsbWQUKpx7U7nFRNHFJRPd
35
35
  lemonade/tools/llamacpp/load.py,sha256=o3vVlefdxmdkHnuvFR3TOxiJkpNAuNFcs9Whfp24jpg,9236
36
36
  lemonade/tools/oga/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
37
37
  lemonade/tools/oga/bench.py,sha256=T3c40NevM3NA7CT98B6vBj1nXfdITDqpfMHYSjhjwpA,5061
38
- lemonade/tools/oga/load.py,sha256=xSP0DWoGd5zBRozSafj1MMyIQyHJuIRj_vNlCTx8mfs,28309
38
+ lemonade/tools/oga/load.py,sha256=XSznW8lOX_KafSq5J5mIBJzj8YJEBpK0RFGcTE1wnE8,28317
39
39
  lemonade/tools/oga/utils.py,sha256=p7faMNfT-rLURC9t_s1S_STQRzzLADqbngUliTOOXeQ,16144
40
40
  lemonade/tools/quark/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
41
- lemonade/tools/quark/quark_load.py,sha256=tNy-G9yEJ5cTsxw9LmGUYmmdlEzMo_iy-KSIc2YVz6U,5581
42
- lemonade/tools/quark/quark_quantize.py,sha256=LZrcbLf9oIw7FW2ccP_qkCP32jxmz5YnNEaoY6rsAuY,16583
41
+ lemonade/tools/quark/quark_load.py,sha256=FJ4LJKTToZbHHWVEOBLadae1a3jCnnY4KvXySHbkJMA,5589
42
+ lemonade/tools/quark/quark_quantize.py,sha256=hwoaXhpBIORvJ16MvewphPkaDEQn3BAgXq5o82Gc-_s,16599
43
43
  lemonade/tools/report/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
44
44
  lemonade/tools/report/llm_report.py,sha256=bVHhwCINA-Ok2EdSwAsLubsc83N3KWOVuwTguw7jDcE,6676
45
45
  lemonade/tools/report/table.py,sha256=wJFzKtlmGQH0RQ5O9nevtpMe_-zQ-8zNOndINQuzsjM,27793
46
46
  lemonade/tools/server/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
47
- lemonade/tools/server/llamacpp.py,sha256=vjFNelm_VyKBBgWmltsAwLI7ncQ9AwVFQD7krZnF42w,16199
48
- lemonade/tools/server/serve.py,sha256=3_jBpi6THnnAmtKOxvPlOkIhSTTmrlZE3fr2Dpto-Q4,52794
47
+ lemonade/tools/server/llamacpp.py,sha256=e1MYKSJBu-jlOE5GQSBsC9CUPAeqw5wXXxoxBKA5zb8,20038
48
+ lemonade/tools/server/serve.py,sha256=ORffC4bcBJ-L5-JbmZX91X3yHt1JWxZcIjrZuu9x8TQ,56165
49
49
  lemonade/tools/server/tool_calls.py,sha256=xrAlQwKG-nv2xLlf8f9CDSaUbyMn8ZtHkds9iZLG9K8,5230
50
50
  lemonade/tools/server/tray.py,sha256=4Kf3x8YfRaItPW7lxlEwerD7c5Q2snzcNk3ZrEoae58,17259
51
51
  lemonade/tools/server/webapp.py,sha256=8Das5yXOaSBLZmSZ_eddJajQFxBhvl5D6GI_hHlGbE0,1040
52
52
  lemonade/tools/server/static/favicon.ico,sha256=hMmP9qGJNeZ0mFS86JIqPbZstXMZn0Z76_HfHQpREAU,126745
53
- lemonade/tools/server/static/styles.css,sha256=u-SzZ-vh5qEFMDSKLHJ7MsQwvwpJLB_DdJxocf06Sro,16880
54
- lemonade/tools/server/static/webapp.html,sha256=kPzORaogVRdFQewXyNI_JaH2ZZCTaq5zfMSyzuoFTuA,22414
53
+ lemonade/tools/server/static/styles.css,sha256=x-pf7xts0te9JWAafcNFqzE7r1fl6n_H362Eiz49ixI,24722
54
+ lemonade/tools/server/static/webapp.html,sha256=AS61ZBDnZkIUpT-iZFlTnWpkp6Yeozs4obzauX4crlU,35004
55
55
  lemonade/tools/server/utils/port.py,sha256=XnIg2qS73QRrsJn6LgHcrJPmku30Tv6vsYcBVMj82K4,2186
56
56
  lemonade/tools/server/utils/system_tray.py,sha256=b9lvNv9chJKQxvmH7qzAuUe6H9HsLu7pdHFqGlAJaL0,12654
57
57
  lemonade/tools/server/utils/thread.py,sha256=pK9K_6DNWoQ78NArkAX3Ym2WsxLnCs9sKTk6TitlYnI,2804
58
58
  lemonade_install/__init__.py,sha256=26zohKg2jgr_5y7tObduWMYQg8zCTWMZHL8lfi2zZVQ,40
59
59
  lemonade_install/install.py,sha256=DJWR36QSjZtvEwRjYPNSjhYgoxLjI_6OPrCMZjL0ChY,28263
60
- lemonade_sdk-8.0.3.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
61
- lemonade_sdk-8.0.3.dist-info/licenses/NOTICE.md,sha256=B8lEqi4QE41J9ljz4Riv2JgHD1v8GCZE6nNBHO3KIA0,2135
60
+ lemonade_sdk-8.0.4.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
61
+ lemonade_sdk-8.0.4.dist-info/licenses/NOTICE.md,sha256=B8lEqi4QE41J9ljz4Riv2JgHD1v8GCZE6nNBHO3KIA0,2135
62
62
  lemonade_server/cli.py,sha256=z6ojwFaOIz0hbUbVtZWMLP4YDpkcVOmqwmdm55dhKA4,11980
63
- lemonade_server/model_manager.py,sha256=Yvlsl0wipKfryKULH5ASQ9INhLQXPq9dTGQVBXf2_h0,16167
64
- lemonade_server/pydantic_models.py,sha256=nsbpHqAkd6nkz5QT16u9xMZbCXqccGiy5O0fWecOM88,2338
65
- lemonade_server/server_models.json,sha256=O5zk94gH_zRq6GSwbqvi2SNwx51eY9uqgAl_kxTi0iM,7271
66
- lemonade_sdk-8.0.3.dist-info/METADATA,sha256=WesWziLri9jQjZILRENliiJbggTVF8LmXKVIERInVbE,8285
67
- lemonade_sdk-8.0.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
68
- lemonade_sdk-8.0.3.dist-info/entry_points.txt,sha256=gJppn0ETtXXR6ceKWEIRdk42kMC7ps59EmU3NCPyPUk,144
69
- lemonade_sdk-8.0.3.dist-info/top_level.txt,sha256=10ap5GNiPhalO4V50LRoxA1FqRT9g3Xkia6BITu880k,42
70
- lemonade_sdk-8.0.3.dist-info/RECORD,,
63
+ lemonade_server/model_manager.py,sha256=0HqLR38uOu_hxRWVYQ_P6YmwaR-jkDuaAqGYo60X8C0,16702
64
+ lemonade_server/pydantic_models.py,sha256=rp_FFhoTwg6jNmgol-kShwffnRDGbt7jTbIeELvgOIo,2876
65
+ lemonade_server/server_models.json,sha256=Y-j9KAvHmfv77welC0rfRao4inLBce6AVySb-oy_uNE,7519
66
+ lemonade_sdk-8.0.4.dist-info/METADATA,sha256=FqA9Jtgx1QE1EjLg_lxcfcAMI3j0cKpZxoe4GnaGLRA,7754
67
+ lemonade_sdk-8.0.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
68
+ lemonade_sdk-8.0.4.dist-info/entry_points.txt,sha256=gJppn0ETtXXR6ceKWEIRdk42kMC7ps59EmU3NCPyPUk,144
69
+ lemonade_sdk-8.0.4.dist-info/top_level.txt,sha256=10ap5GNiPhalO4V50LRoxA1FqRT9g3Xkia6BITu880k,42
70
+ lemonade_sdk-8.0.4.dist-info/RECORD,,
@@ -54,6 +54,17 @@ class ModelManager:
54
54
  for model_name, model_info in user_models.items()
55
55
  }
56
56
 
57
+ # Backwards compatibility for user models that were created before version 8.0.4
58
+ # "reasoning" was a boolean, but as of 8.0.4 it became a label
59
+ for _, model_info in user_models.items():
60
+ if "reasoning" in model_info:
61
+ model_info["labels"] = (
62
+ ["reasoning"]
63
+ if not model_info["labels"]
64
+ else model_info["labels"] + ["reasoning"]
65
+ )
66
+ del model_info["reasoning"]
67
+
57
68
  models.update(user_models)
58
69
 
59
70
  # Add the model name as a key in each entry, to make it easier
@@ -268,9 +279,8 @@ class ModelManager:
268
279
  new_user_model = {
269
280
  "checkpoint": checkpoint,
270
281
  "recipe": recipe,
271
- "reasoning": reasoning,
272
282
  "suggested": True,
273
- "labels": ["custom"],
283
+ "labels": ["custom"] + (["reasoning"] if reasoning else []),
274
284
  }
275
285
 
276
286
  if mmproj:
@@ -1,4 +1,4 @@
1
- from typing import Optional
1
+ from typing import Optional, Union, List, Any
2
2
 
3
3
  from pydantic import BaseModel
4
4
 
@@ -65,6 +65,30 @@ class ChatCompletionRequest(BaseModel):
65
65
  response_format: dict | None = None
66
66
 
67
67
 
68
+ class EmbeddingsRequest(BaseModel):
69
+ """
70
+ Request model for embeddings API endpoint.
71
+
72
+ Generates embeddings for the provided input text or tokens.
73
+ """
74
+
75
+ input: Union[str, List]
76
+ model: Optional[str] = None
77
+ encoding_format: Optional[str] = "float" # "float" or "base64"
78
+
79
+
80
+ class RerankingRequest(BaseModel):
81
+ """
82
+ Request model for reranking API endpoint.
83
+
84
+ Reranks a list of documents based on their relevance to a query.
85
+ """
86
+
87
+ query: str
88
+ documents: List[str]
89
+ model: str
90
+
91
+
68
92
  class ResponsesRequest(BaseModel):
69
93
  """
70
94
  Request model for responses API endpoint.