lemonade-sdk 8.0.2__py3-none-any.whl → 8.0.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lemonade-sdk might be problematic. Click here for more details.

@@ -33,7 +33,47 @@
33
33
  <input type="text" id="chat-input" placeholder="Type your message..." />
34
34
  <button id="send-btn">Send</button>
35
35
  </div>
36
- </div>
36
+ </div>
37
+ <!-- App Suggestions Section -->
38
+ <div class="app-suggestions-section">
39
+ <div class="suggestion-text">
40
+ Use Lemonade with your favorite app
41
+ </div>
42
+ <div class="app-logos-grid">
43
+ <a href="https://lemonade-server.ai/docs/server/apps/open-webui/" target="_blank" class="app-logo-item" title="Open WebUI">
44
+ <img src="https://raw.githubusercontent.com/lemonade-sdk/assets/refs/heads/main/partner_logos/openwebui.jpg" alt="Open WebUI" class="app-logo-img">
45
+ <span class="app-name">Open WebUI</span>
46
+ </a>
47
+ <a href="https://lemonade-server.ai/docs/server/apps/continue/" target="_blank" class="app-logo-item" title="Continue">
48
+ <img src="https://raw.githubusercontent.com/lemonade-sdk/assets/refs/heads/main/partner_logos/continue_dev.png" alt="Continue" class="app-logo-img">
49
+ <span class="app-name">Continue</span>
50
+ </a>
51
+ <a href="https://github.com/amd/gaia" target="_blank" class="app-logo-item" title="Gaia">
52
+ <img src="https://raw.githubusercontent.com/lemonade-sdk/assets/refs/heads/main/partner_logos/gaia.ico" alt="Gaia" class="app-logo-img">
53
+ <span class="app-name">Gaia</span>
54
+ </a>
55
+ <a href="https://lemonade-server.ai/docs/server/apps/anythingLLM/" target="_blank" class="app-logo-item" title="AnythingLLM">
56
+ <img src="https://raw.githubusercontent.com/lemonade-sdk/assets/refs/heads/main/partner_logos/anything_llm.png" alt="AnythingLLM" class="app-logo-img">
57
+ <span class="app-name">AnythingLLM</span>
58
+ </a>
59
+ <a href="https://lemonade-server.ai/docs/server/apps/ai-dev-gallery/" target="_blank" class="app-logo-item" title="AI Dev Gallery">
60
+ <img src="https://raw.githubusercontent.com/lemonade-sdk/assets/refs/heads/main/partner_logos/ai_dev_gallery.webp" alt="AI Dev Gallery" class="app-logo-img">
61
+ <span class="app-name">AI Dev Gallery</span>
62
+ </a>
63
+ <a href="https://lemonade-server.ai/docs/server/apps/lm-eval/" target="_blank" class="app-logo-item" title="LM-Eval">
64
+ <img src="https://raw.githubusercontent.com/lemonade-sdk/assets/refs/heads/main/partner_logos/lm_eval.png" alt="LM-Eval" class="app-logo-img">
65
+ <span class="app-name">LM-Eval</span>
66
+ </a>
67
+ <a href="https://lemonade-server.ai/docs/server/apps/codeGPT/" target="_blank" class="app-logo-item" title="CodeGPT">
68
+ <img src="https://raw.githubusercontent.com/lemonade-sdk/assets/refs/heads/main/partner_logos/codegpt.jpg" alt="CodeGPT" class="app-logo-img">
69
+ <span class="app-name">CodeGPT</span>
70
+ </a>
71
+ <a href="https://github.com/lemonade-sdk/lemonade/blob/main/docs/server/apps/ai-toolkit.md" target="_blank" class="app-logo-item" title="AI Toolkit">
72
+ <img src="https://raw.githubusercontent.com/lemonade-sdk/assets/refs/heads/main/partner_logos/ai_toolkit.png" alt="AI Toolkit" class="app-logo-img">
73
+ <span class="app-name">AI Toolkit</span>
74
+ </a>
75
+ </div>
76
+ </div>
37
77
  </div>
38
78
  <div class="tab-content" id="content-models"> <div class="model-mgmt-register-form collapsed"> <h3 class="model-mgmt-form-title" onclick="toggleAddModelForm()">
39
79
  Add a Model
@@ -109,27 +149,157 @@
109
149
  <div class="copyright">Copyright 2025 AMD</div>
110
150
  </footer>
111
151
  <script src="https://cdn.jsdelivr.net/npm/openai@4.21.0/dist/openai.min.js"></script>
112
- <script> // Tab switching logic
113
- function showTab(tab) {
152
+ <script src="https://cdn.jsdelivr.net/npm/marked@9.1.0/marked.min.js"></script>
153
+ <script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script>
154
+ <script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
155
+ <script>
156
+ // Configure MathJax
157
+ window.MathJax = {
158
+ tex: {
159
+ inlineMath: [['\\(', '\\)'], ['$', '$']],
160
+ displayMath: [['\\[', '\\]'], ['$$', '$$']],
161
+ processEscapes: true,
162
+ processEnvironments: true
163
+ },
164
+ options: {
165
+ skipHtmlTags: ['script', 'noscript', 'style', 'textarea', 'pre']
166
+ }
167
+ };
168
+ </script>
169
+ <script>
170
+ // Configure marked.js for safe HTML rendering
171
+ marked.setOptions({
172
+ breaks: true,
173
+ gfm: true,
174
+ sanitize: false,
175
+ smartLists: true,
176
+ smartypants: true
177
+ });
178
+
179
+ // Function to unescape JSON strings
180
+ function unescapeJsonString(str) {
181
+ try {
182
+ return str.replace(/\\n/g, '\n')
183
+ .replace(/\\t/g, '\t')
184
+ .replace(/\\r/g, '\r')
185
+ .replace(/\\"/g, '"')
186
+ .replace(/\\\\/g, '\\');
187
+ } catch (error) {
188
+ console.error('Error unescaping string:', error);
189
+ return str;
190
+ }
191
+ }
192
+
193
+ // Function to safely render markdown with MathJax support
194
+ function renderMarkdown(text) {
195
+ try {
196
+ const html = marked.parse(text);
197
+ // Trigger MathJax to process the new content
198
+ if (window.MathJax && window.MathJax.typesetPromise) {
199
+ // Use a timeout to ensure DOM is updated before typesetting
200
+ setTimeout(() => {
201
+ window.MathJax.typesetPromise();
202
+ }, 0);
203
+ }
204
+ return html;
205
+ } catch (error) {
206
+ console.error('Error rendering markdown:', error);
207
+ return text; // fallback to plain text
208
+ }
209
+ }
210
+
211
+ // Tab switching logic
212
+ function showTab(tab, updateHash = true) {
114
213
  document.getElementById('tab-chat').classList.remove('active');
115
214
  document.getElementById('tab-models').classList.remove('active');
116
215
  document.getElementById('content-chat').classList.remove('active');
117
216
  document.getElementById('content-models').classList.remove('active');
118
217
  if (tab === 'chat') {
119
218
  document.getElementById('tab-chat').classList.add('active');
120
- document.getElementById('content-chat').classList.add('active');
219
+ document.getElementById('content-chat').classList.add('active');
220
+ if (updateHash) {
221
+ window.location.hash = 'llm-chat';
222
+ }
121
223
  } else {
122
224
  document.getElementById('tab-models').classList.add('active');
123
- document.getElementById('content-models').classList.add('active');
225
+ document.getElementById('content-models').classList.add('active');
226
+ if (updateHash) {
227
+ window.location.hash = 'model-management';
228
+ }
124
229
  }
125
230
  }
126
231
 
232
+ // Handle hash changes for anchor navigation
233
+ function handleHashChange() {
234
+ const hash = window.location.hash.slice(1); // Remove the # symbol
235
+ if (hash === 'llm-chat') {
236
+ showTab('chat', false);
237
+ } else if (hash === 'model-management') {
238
+ showTab('models', false);
239
+ }
240
+ }
241
+
242
+ // Initialize tab based on URL hash on page load
243
+ function initializeTabFromHash() {
244
+ const hash = window.location.hash.slice(1);
245
+ if (hash === 'llm-chat') {
246
+ showTab('chat', false);
247
+ } else if (hash === 'model-management') {
248
+ showTab('models', false);
249
+ }
250
+ // If no hash or unrecognized hash, keep default (chat tab is already active)
251
+ }
252
+
253
+ // Listen for hash changes
254
+ window.addEventListener('hashchange', handleHashChange);
255
+
256
+ // Initialize on page load
257
+ document.addEventListener('DOMContentLoaded', initializeTabFromHash);
258
+
127
259
  // Toggle Add Model form
128
260
  function toggleAddModelForm() {
129
261
  const form = document.querySelector('.model-mgmt-register-form');
130
262
  form.classList.toggle('collapsed');
131
263
  }
132
264
 
265
+ // Handle image load failures for app logos
266
+ function handleImageFailure(img) {
267
+ const logoItem = img.closest('.app-logo-item');
268
+ if (logoItem) {
269
+ logoItem.classList.add('image-failed');
270
+ }
271
+ }
272
+
273
+ // Set up image error handlers when DOM is loaded
274
+ document.addEventListener('DOMContentLoaded', function() {
275
+ const logoImages = document.querySelectorAll('.app-logo-img');
276
+ logoImages.forEach(function(img) {
277
+ let imageLoaded = false;
278
+
279
+ img.addEventListener('load', function() {
280
+ imageLoaded = true;
281
+ });
282
+
283
+ img.addEventListener('error', function() {
284
+ if (!imageLoaded) {
285
+ handleImageFailure(this);
286
+ }
287
+ });
288
+
289
+ // Also check if image is already broken (cached failure)
290
+ if (img.complete && img.naturalWidth === 0) {
291
+ handleImageFailure(img);
292
+ }
293
+
294
+ // Timeout fallback for slow connections (5 seconds)
295
+ setTimeout(function() {
296
+ if (!imageLoaded && !img.complete) {
297
+ handleImageFailure(img);
298
+ }
299
+ }, 5000);
300
+ });
301
+ });
302
+
133
303
  // Helper to get server base URL
134
304
  function getServerBaseUrl() {
135
305
  const port = window.SERVER_PORT || 8000;
@@ -151,17 +321,37 @@
151
321
  select.innerHTML = '<option>No models available</option>';
152
322
  return;
153
323
  }
324
+
325
+ // Filter out embedding models from chat interface
326
+ const allModels = window.SERVER_MODELS || {};
327
+ let filteredModels = [];
154
328
  let defaultIndex = 0;
155
- data.data.forEach(function(model, index) {
329
+
330
+ data.data.forEach(function(model) {
156
331
  const modelId = model.id || model.name || model;
332
+ const modelInfo = allModels[modelId] || {};
333
+ const labels = modelInfo.labels || [];
334
+
335
+ // Skip models with "embeddings" or "reranking" label
336
+ if (labels.includes('embeddings') || labels.includes('reranking')) {
337
+ return;
338
+ }
339
+
340
+ filteredModels.push(modelId);
157
341
  const opt = document.createElement('option');
158
342
  opt.value = modelId;
159
343
  opt.textContent = modelId;
160
344
  if (modelId === 'Llama-3.2-1B-Instruct-Hybrid') {
161
- defaultIndex = index;
345
+ defaultIndex = filteredModels.length - 1;
162
346
  }
163
347
  select.appendChild(opt);
164
348
  });
349
+
350
+ if (filteredModels.length === 0) {
351
+ select.innerHTML = '<option>No chat models available</option>';
352
+ return;
353
+ }
354
+
165
355
  select.selectedIndex = defaultIndex;
166
356
  } catch (e) {
167
357
  const select = document.getElementById('model-select');
@@ -184,26 +374,24 @@
184
374
 
185
375
  // Add labels if they exist
186
376
  const modelData = allModels[modelId];
187
- if (modelData) {
188
- // Add reasoning label if reasoning is true
189
- if (modelData.reasoning === true) {
190
- const reasoningLabel = document.createElement('span');
191
- reasoningLabel.className = 'model-label reasoning';
192
- reasoningLabel.textContent = 'reasoning';
193
- container.appendChild(reasoningLabel);
194
- }
195
-
196
- // Add other labels if they exist
197
- if (modelData.labels && Array.isArray(modelData.labels)) {
198
- modelData.labels.forEach(label => {
199
- const labelSpan = document.createElement('span');
200
- const labelLower = label.toLowerCase();
201
- const labelClass = (labelLower === 'vision') ? 'vision' : 'other';
202
- labelSpan.className = `model-label ${labelClass}`;
203
- labelSpan.textContent = label;
204
- container.appendChild(labelSpan);
205
- });
206
- }
377
+ if (modelData && modelData.labels && Array.isArray(modelData.labels)) {
378
+ modelData.labels.forEach(label => {
379
+ const labelSpan = document.createElement('span');
380
+ const labelLower = label.toLowerCase();
381
+ let labelClass = 'other';
382
+ if (labelLower === 'vision') {
383
+ labelClass = 'vision';
384
+ } else if (labelLower === 'embeddings') {
385
+ labelClass = 'embeddings';
386
+ } else if (labelLower === 'reasoning') {
387
+ labelClass = 'reasoning';
388
+ } else if (labelLower === 'reranking') {
389
+ labelClass = 'reranking';
390
+ }
391
+ labelSpan.className = `model-label ${labelClass}`;
392
+ labelSpan.textContent = label;
393
+ container.appendChild(labelSpan);
394
+ });
207
395
  }
208
396
 
209
397
  return container;
@@ -325,16 +513,110 @@
325
513
  const modelSelect = document.getElementById('model-select');
326
514
  let messages = [];
327
515
 
328
- function appendMessage(role, text) {
516
+ function appendMessage(role, text, isMarkdown = false) {
329
517
  const div = document.createElement('div');
330
518
  div.className = 'chat-message ' + role;
331
519
  // Add a bubble for iMessage style
332
520
  const bubble = document.createElement('div');
333
521
  bubble.className = 'chat-bubble ' + role;
334
- bubble.innerHTML = text;
522
+
523
+ if (role === 'llm' && isMarkdown) {
524
+ bubble.innerHTML = renderMarkdownWithThinkTokens(text);
525
+ } else {
526
+ bubble.textContent = text;
527
+ }
528
+
335
529
  div.appendChild(bubble);
336
530
  chatHistory.appendChild(div);
337
531
  chatHistory.scrollTop = chatHistory.scrollHeight;
532
+ return bubble; // Return the bubble element for streaming updates
533
+ }
534
+
535
+ function updateMessageContent(bubbleElement, text, isMarkdown = false) {
536
+ if (isMarkdown) {
537
+ bubbleElement.innerHTML = renderMarkdownWithThinkTokens(text);
538
+ } else {
539
+ bubbleElement.textContent = text;
540
+ }
541
+ }
542
+
543
+ function renderMarkdownWithThinkTokens(text) {
544
+ // Check if text contains opening think tag
545
+ if (text.includes('<think>')) {
546
+ if (text.includes('</think>')) {
547
+ // Complete think block - handle as before
548
+ const thinkMatch = text.match(/<think>(.*?)<\/think>/s);
549
+ if (thinkMatch) {
550
+ const thinkContent = thinkMatch[1].trim();
551
+ const mainResponse = text.replace(/<think>.*?<\/think>/s, '').trim();
552
+
553
+ // Create collapsible structure
554
+ let html = '';
555
+ if (thinkContent) {
556
+ html += `
557
+ <div class="think-tokens-container">
558
+ <div class="think-tokens-header" onclick="toggleThinkTokens(this)">
559
+ <span class="think-tokens-chevron">▼</span>
560
+ <span class="think-tokens-label">Thinking...</span>
561
+ </div>
562
+ <div class="think-tokens-content">
563
+ ${renderMarkdown(thinkContent)}
564
+ </div>
565
+ </div>
566
+ `;
567
+ }
568
+ if (mainResponse) {
569
+ html += `<div class="main-response">${renderMarkdown(mainResponse)}</div>`;
570
+ }
571
+ return html;
572
+ }
573
+ } else {
574
+ // Partial think block - only opening tag found, still being generated
575
+ const thinkMatch = text.match(/<think>(.*)/s);
576
+ if (thinkMatch) {
577
+ const thinkContent = thinkMatch[1];
578
+ const beforeThink = text.substring(0, text.indexOf('<think>'));
579
+
580
+ let html = '';
581
+ if (beforeThink.trim()) {
582
+ html += `<div class="main-response">${renderMarkdown(beforeThink)}</div>`;
583
+ }
584
+
585
+ html += `
586
+ <div class="think-tokens-container">
587
+ <div class="think-tokens-header" onclick="toggleThinkTokens(this)">
588
+ <span class="think-tokens-chevron">▼</span>
589
+ <span class="think-tokens-label">Thinking...</span>
590
+ </div>
591
+ <div class="think-tokens-content">
592
+ ${renderMarkdown(thinkContent)}
593
+ </div>
594
+ </div>
595
+ `;
596
+
597
+ return html;
598
+ }
599
+ }
600
+ }
601
+
602
+ // Fallback to normal markdown rendering
603
+ return renderMarkdown(text);
604
+ }
605
+
606
+ function toggleThinkTokens(header) {
607
+ const container = header.parentElement;
608
+ const content = container.querySelector('.think-tokens-content');
609
+ const chevron = header.querySelector('.think-tokens-chevron');
610
+
611
+ if (content.style.display === 'none') {
612
+ content.style.display = 'block';
613
+ chevron.textContent = '▼';
614
+ container.classList.remove('collapsed');
615
+ } else {
616
+ content.style.display = 'none';
617
+ chevron.textContent = '▶';
618
+ container.classList.add('collapsed');
619
+ }
338
620
  }
339
621
 
340
622
  async function sendMessage() {
@@ -346,8 +628,7 @@
346
628
  sendBtn.disabled = true;
347
629
  // Streaming OpenAI completions (placeholder, adapt as needed)
348
630
  let llmText = '';
349
- appendMessage('llm', '...');
350
- const llmDiv = chatHistory.lastChild.querySelector('.chat-bubble.llm');
631
+ const llmBubble = appendMessage('llm', '...');
351
632
  try {
352
633
  // Use the correct endpoint for chat completions
353
634
  const resp = await fetch(getServerBaseUrl() + '/api/v1/chat/completions', {
@@ -362,22 +643,40 @@
362
643
  if (!resp.body) throw new Error('No stream');
363
644
  const reader = resp.body.getReader();
364
645
  let decoder = new TextDecoder();
365
- llmDiv.textContent = '';
646
+ llmBubble.textContent = '';
366
647
  while (true) {
367
648
  const { done, value } = await reader.read();
368
649
  if (done) break;
369
650
  const chunk = decoder.decode(value);
370
651
  if (chunk.trim() === 'data: [DONE]' || chunk.trim() === '[DONE]') continue;
371
- // Try to extract the content from the OpenAI chunk
372
- const match = chunk.match(/"content"\s*:\s*"([^"]*)"/);
373
- if (match && match[1]) {
374
- llmText += match[1];
375
- llmDiv.textContent = llmText;
652
+
653
+ // Handle Server-Sent Events format
654
+ const lines = chunk.split('\n');
655
+ for (const line of lines) {
656
+ if (line.startsWith('data: ')) {
657
+ const jsonStr = line.substring(6).trim();
658
+ if (jsonStr === '[DONE]') continue;
659
+
660
+ try {
661
+ const parsed = JSON.parse(jsonStr);
662
+ if (parsed.choices && parsed.choices[0] && parsed.choices[0].delta && parsed.choices[0].delta.content) {
663
+ llmText += parsed.choices[0].delta.content;
664
+ updateMessageContent(llmBubble, llmText, true);
665
+ }
666
+ } catch (e) {
667
+ // Fallback to regex parsing if JSON parsing fails
668
+ const match = jsonStr.match(/"content"\s*:\s*"((?:\\.|[^"\\])*)"/);
669
+ if (match && match[1]) {
670
+ llmText += unescapeJsonString(match[1]);
671
+ updateMessageContent(llmBubble, llmText, true);
672
+ }
673
+ }
674
+ }
376
675
  }
377
676
  }
378
677
  messages.push({ role: 'assistant', content: llmText });
379
678
  } catch (e) {
380
- llmDiv.textContent = '[Error: ' + e.message + ']';
679
+ llmBubble.textContent = '[Error: ' + e.message + ']';
381
680
  }
382
681
  sendBtn.disabled = false;
383
682
  }
@@ -197,11 +197,17 @@ class LemonadeTray(SystemTray):
197
197
  """
198
198
  webbrowser.open("https://lemonade-server.ai/docs/")
199
199
 
200
+ def open_llm_chat(self, _, __):
201
+ """
202
+ Open the LLM chat in the default web browser.
203
+ """
204
+ webbrowser.open(f"http://localhost:{self.port}/#llm-chat")
205
+
200
206
  def open_model_manager(self, _, __):
201
207
  """
202
208
  Open the model manager in the default web browser.
203
209
  """
204
- webbrowser.open(f"http://localhost:{self.port}/")
210
+ webbrowser.open(f"http://localhost:{self.port}/#model-management")
205
211
 
206
212
  def check_server_state(self):
207
213
  """
@@ -339,16 +345,25 @@ class LemonadeTray(SystemTray):
339
345
 
340
346
  # Create menu items for all downloaded models
341
347
  model_menu_items = []
342
- for model_name, _ in self.downloaded_models.items():
343
- # Create a function that returns the lambda to properly capture the variables
344
- def create_handler(mod):
345
- return lambda icon, item: self.load_llm(icon, item, mod)
348
+ if not self.downloaded_models:
349
+ model_menu_items.append(
350
+ MenuItem(
351
+ "No models available: Use the Model Manager to pull models",
352
+ None,
353
+ enabled=False,
354
+ )
355
+ )
356
+ else:
357
+ for model_name, _ in self.downloaded_models.items():
358
+ # Create a function that returns the lambda to properly capture the variables
359
+ def create_handler(mod):
360
+ return lambda icon, item: self.load_llm(icon, item, mod)
346
361
 
347
- model_item = MenuItem(model_name, create_handler(model_name))
362
+ model_item = MenuItem(model_name, create_handler(model_name))
348
363
 
349
- # Set checked property instead of modifying the text
350
- model_item.checked = model_name == self.loaded_llm
351
- model_menu_items.append(model_item)
364
+ # Set checked property instead of modifying the text
365
+ model_item.checked = model_name == self.loaded_llm
366
+ model_menu_items.append(model_item)
352
367
 
353
368
  load_submenu = Menu(*model_menu_items)
354
369
 
@@ -391,6 +406,7 @@ class LemonadeTray(SystemTray):
391
406
  )
392
407
 
393
408
  items.append(MenuItem("Documentation", self.open_documentation))
409
+ items.append(MenuItem("LLM Chat", self.open_llm_chat))
394
410
  items.append(MenuItem("Model Manager", self.open_model_manager))
395
411
  items.append(MenuItem("Show Logs", self.show_logs))
396
412
  items.append(Menu.SEPARATOR)
lemonade/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "8.0.2"
1
+ __version__ = "8.0.4"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lemonade-sdk
3
- Version: 8.0.2
3
+ Version: 8.0.4
4
4
  Summary: Lemonade SDK: Your LLM Aide for Validation and Deployment
5
5
  Author-email: lemonade@amd.com
6
6
  Requires-Python: >=3.10, <3.12
@@ -26,45 +26,49 @@ Requires-Dist: openai>=1.81.0
26
26
  Requires-Dist: transformers<=4.51.3
27
27
  Requires-Dist: jinja2
28
28
  Requires-Dist: tabulate
29
- Requires-Dist: huggingface-hub==0.30.2
29
+ Requires-Dist: sentencepiece
30
+ Requires-Dist: huggingface-hub==0.33.0
31
+ Provides-Extra: oga-hybrid
32
+ Requires-Dist: onnx==1.16.1; extra == "oga-hybrid"
33
+ Requires-Dist: numpy==1.26.4; extra == "oga-hybrid"
34
+ Requires-Dist: protobuf>=6.30.1; extra == "oga-hybrid"
35
+ Provides-Extra: oga-cpu
36
+ Requires-Dist: onnxruntime-genai==0.8.2; extra == "oga-cpu"
37
+ Requires-Dist: onnxruntime>=1.22.0; extra == "oga-cpu"
38
+ Provides-Extra: dev
39
+ Requires-Dist: torch>=2.6.0; extra == "dev"
40
+ Requires-Dist: accelerate; extra == "dev"
41
+ Requires-Dist: datasets; extra == "dev"
42
+ Requires-Dist: pandas>=1.5.3; extra == "dev"
43
+ Requires-Dist: matplotlib; extra == "dev"
44
+ Requires-Dist: human-eval-windows==1.0.4; extra == "dev"
45
+ Requires-Dist: lm-eval[api]; extra == "dev"
30
46
  Provides-Extra: oga-hybrid-minimal
31
- Requires-Dist: onnx==1.16.1; extra == "oga-hybrid-minimal"
32
- Requires-Dist: numpy==1.26.4; extra == "oga-hybrid-minimal"
33
- Requires-Dist: protobuf>=6.30.1; extra == "oga-hybrid-minimal"
47
+ Requires-Dist: lemonade-sdk[oga-hybrid]; extra == "oga-hybrid-minimal"
34
48
  Provides-Extra: oga-cpu-minimal
35
- Requires-Dist: onnxruntime-genai==0.6.0; extra == "oga-cpu-minimal"
36
- Requires-Dist: onnxruntime<1.22.0,>=1.10.1; extra == "oga-cpu-minimal"
49
+ Requires-Dist: lemonade-sdk[oga-cpu]; extra == "oga-cpu-minimal"
37
50
  Provides-Extra: llm
38
- Requires-Dist: torch>=2.6.0; extra == "llm"
39
- Requires-Dist: accelerate; extra == "llm"
40
- Requires-Dist: sentencepiece; extra == "llm"
41
- Requires-Dist: datasets; extra == "llm"
42
- Requires-Dist: pandas>=1.5.3; extra == "llm"
43
- Requires-Dist: matplotlib; extra == "llm"
44
- Requires-Dist: human-eval-windows==1.0.4; extra == "llm"
45
- Requires-Dist: lm-eval[api]; extra == "llm"
51
+ Requires-Dist: lemonade-sdk[dev]; extra == "llm"
46
52
  Provides-Extra: llm-oga-cpu
47
- Requires-Dist: lemonade-sdk[oga-cpu-minimal]; extra == "llm-oga-cpu"
48
- Requires-Dist: lemonade-sdk[llm]; extra == "llm-oga-cpu"
53
+ Requires-Dist: lemonade-sdk[dev,oga-cpu]; extra == "llm-oga-cpu"
49
54
  Provides-Extra: llm-oga-igpu
50
55
  Requires-Dist: onnxruntime-genai-directml==0.6.0; extra == "llm-oga-igpu"
51
56
  Requires-Dist: onnxruntime-directml<1.22.0,>=1.19.0; extra == "llm-oga-igpu"
52
57
  Requires-Dist: transformers<4.45.0; extra == "llm-oga-igpu"
53
- Requires-Dist: lemonade-sdk[llm]; extra == "llm-oga-igpu"
58
+ Requires-Dist: lemonade-sdk[dev]; extra == "llm-oga-igpu"
54
59
  Provides-Extra: llm-oga-cuda
55
- Requires-Dist: onnxruntime-genai-cuda==0.6.0; extra == "llm-oga-cuda"
56
- Requires-Dist: onnxruntime-gpu<1.22.0,>=1.19.1; extra == "llm-oga-cuda"
57
- Requires-Dist: transformers<4.45.0; extra == "llm-oga-cuda"
58
- Requires-Dist: lemonade-sdk[llm]; extra == "llm-oga-cuda"
60
+ Requires-Dist: onnxruntime-genai-cuda==0.8.2; extra == "llm-oga-cuda"
61
+ Requires-Dist: onnxruntime-gpu>=1.22.0; extra == "llm-oga-cuda"
62
+ Requires-Dist: transformers<=4.51.3; extra == "llm-oga-cuda"
63
+ Requires-Dist: lemonade-sdk[dev]; extra == "llm-oga-cuda"
59
64
  Provides-Extra: llm-oga-npu
60
65
  Requires-Dist: onnx==1.16.0; extra == "llm-oga-npu"
61
66
  Requires-Dist: onnxruntime==1.18.0; extra == "llm-oga-npu"
62
67
  Requires-Dist: numpy==1.26.4; extra == "llm-oga-npu"
63
68
  Requires-Dist: protobuf>=6.30.1; extra == "llm-oga-npu"
64
- Requires-Dist: lemonade-sdk[llm]; extra == "llm-oga-npu"
69
+ Requires-Dist: lemonade-sdk[dev]; extra == "llm-oga-npu"
65
70
  Provides-Extra: llm-oga-hybrid
66
- Requires-Dist: lemonade-sdk[oga-hybrid-minimal]; extra == "llm-oga-hybrid"
67
- Requires-Dist: lemonade-sdk[llm]; extra == "llm-oga-hybrid"
71
+ Requires-Dist: lemonade-sdk[dev,oga-hybrid]; extra == "llm-oga-hybrid"
68
72
  Provides-Extra: llm-oga-unified
69
73
  Requires-Dist: lemonade-sdk[llm-oga-hybrid]; extra == "llm-oga-unified"
70
74
  Dynamic: author-email
@@ -78,7 +82,7 @@ Dynamic: summary
78
82
 
79
83
  [![Lemonade tests](https://github.com/lemonade-sdk/lemonade/actions/workflows/test_lemonade.yml/badge.svg)](https://github.com/lemonade-sdk/lemonade/tree/main/test "Check out our tests")
80
84
  [![OS - Windows | Linux](https://img.shields.io/badge/OS-windows%20%7C%20linux-blue)](docs/README.md#installation "Check out our instructions")
81
- [![Made with Python](https://img.shields.io/badge/Python-3.8,3.10-blue?logo=python&logoColor=white)](docs/README.md#installation "Check out our instructions")
85
+ [![Made with Python](https://img.shields.io/badge/Python-3.10-blue?logo=python&logoColor=white)](docs/README.md#installation "Check out our instructions")
82
86
 
83
87
  ## 🍋 Lemonade SDK: Quickly serve, benchmark and deploy LLMs
84
88
 
@@ -93,8 +97,8 @@ The [Lemonade SDK](./docs/README.md) makes it easy to run Large Language Models
93
97
  The [Lemonade SDK](./docs/README.md) is comprised of the following:
94
98
 
95
99
  - 🌐 **[Lemonade Server](https://lemonade-server.ai/docs)**: A local LLM server for running ONNX and GGUF models using the OpenAI API standard. Install and enable your applications with NPU and GPU acceleration in minutes.
96
- - 🐍 **Lemonade API**: High-level Python API to directly integrate Lemonade LLMs into Python applications.
97
- - 🖥️ **Lemonade CLI**: The `lemonade` CLI lets you mix-and-match LLMs (ONNX, GGUF, SafeTensors) with measurement tools to characterize your models on your hardware. The available tools are:
100
+ - 🐍 **[Lemonade API](./docs/lemonade_api.md)**: High-level Python API to directly integrate Lemonade LLMs into Python applications.
101
+ - 🖥️ **[Lemonade CLI](./docs/dev_cli/README.md)**: The `lemonade` CLI lets you mix-and-match LLMs (ONNX, GGUF, SafeTensors) with measurement tools to characterize your models on your hardware. The available tools are:
98
102
  - Prompting with templates.
99
103
  - Measuring accuracy with a variety of tests.
100
104
  - Benchmarking to get the time-to-first-token and tokens per second.
@@ -149,14 +153,7 @@ Maximum LLM performance requires the right hardware accelerator with the right i
149
153
  </tbody>
150
154
  </table>
151
155
 
152
-
153
-
154
- #### Inference Engines Overview
155
- | Engine | Description |
156
- | :--- | :--- |
157
- | **OnnxRuntime GenAI (OGA)** | Microsoft engine that runs `.onnx` models and enables hardware vendors to provide their own execution providers (EPs) to support specialized hardware, such as neural processing units (NPUs). |
158
- | **llamacpp** | Community-driven engine with strong GPU acceleration, support for thousands of `.gguf` models, and advanced features such as vision-language models (VLMs) and mixture-of-experts (MoEs). |
159
- | **Hugging Face (HF)** | Hugging Face's `transformers` library can run the original `.safetensors` trained weights for models on Meta's PyTorch engine, which provides a source of truth for accuracy measurement. |
156
+ To learn more about the supported hardware and software, visit the documentation [here](./docs/README.md#software-and-hardware-overview).
160
157
 
161
158
  ## Integrate Lemonade Server with Your Application
162
159