lemonade-sdk 8.0.3__py3-none-any.whl → 8.0.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lemonade-sdk might be problematic. Click here for more details.
- lemonade/tools/humaneval.py +1 -1
- lemonade/tools/mmlu.py +1 -1
- lemonade/tools/oga/load.py +1 -1
- lemonade/tools/perplexity.py +2 -2
- lemonade/tools/quark/quark_load.py +1 -1
- lemonade/tools/quark/quark_quantize.py +2 -2
- lemonade/tools/server/llamacpp.py +130 -9
- lemonade/tools/server/serve.py +73 -0
- lemonade/tools/server/static/styles.css +424 -4
- lemonade/tools/server/static/webapp.html +301 -35
- lemonade/version.py +1 -1
- {lemonade_sdk-8.0.3.dist-info → lemonade_sdk-8.0.4.dist-info}/METADATA +5 -12
- {lemonade_sdk-8.0.3.dist-info → lemonade_sdk-8.0.4.dist-info}/RECORD +21 -21
- lemonade_server/model_manager.py +12 -2
- lemonade_server/pydantic_models.py +25 -1
- lemonade_server/server_models.json +46 -44
- {lemonade_sdk-8.0.3.dist-info → lemonade_sdk-8.0.4.dist-info}/WHEEL +0 -0
- {lemonade_sdk-8.0.3.dist-info → lemonade_sdk-8.0.4.dist-info}/entry_points.txt +0 -0
- {lemonade_sdk-8.0.3.dist-info → lemonade_sdk-8.0.4.dist-info}/licenses/LICENSE +0 -0
- {lemonade_sdk-8.0.3.dist-info → lemonade_sdk-8.0.4.dist-info}/licenses/NOTICE.md +0 -0
- {lemonade_sdk-8.0.3.dist-info → lemonade_sdk-8.0.4.dist-info}/top_level.txt +0 -0
|
@@ -33,7 +33,47 @@
|
|
|
33
33
|
<input type="text" id="chat-input" placeholder="Type your message..." />
|
|
34
34
|
<button id="send-btn">Send</button>
|
|
35
35
|
</div>
|
|
36
|
-
</div>
|
|
36
|
+
</div>
|
|
37
|
+
<!-- App Suggestions Section -->
|
|
38
|
+
<div class="app-suggestions-section">
|
|
39
|
+
<div class="suggestion-text">
|
|
40
|
+
Use Lemonade with your favorite app
|
|
41
|
+
</div>
|
|
42
|
+
<div class="app-logos-grid">
|
|
43
|
+
<a href="https://lemonade-server.ai/docs/server/apps/open-webui/" target="_blank" class="app-logo-item" title="Open WebUI">
|
|
44
|
+
<img src="https://raw.githubusercontent.com/lemonade-sdk/assets/refs/heads/main/partner_logos/openwebui.jpg" alt="Open WebUI" class="app-logo-img">
|
|
45
|
+
<span class="app-name">Open WebUI</span>
|
|
46
|
+
</a>
|
|
47
|
+
<a href="https://lemonade-server.ai/docs/server/apps/continue/" target="_blank" class="app-logo-item" title="Continue">
|
|
48
|
+
<img src="https://raw.githubusercontent.com/lemonade-sdk/assets/refs/heads/main/partner_logos/continue_dev.png" alt="Continue" class="app-logo-img">
|
|
49
|
+
<span class="app-name">Continue</span>
|
|
50
|
+
</a>
|
|
51
|
+
<a href="https://github.com/amd/gaia" target="_blank" class="app-logo-item" title="Gaia">
|
|
52
|
+
<img src="https://raw.githubusercontent.com/lemonade-sdk/assets/refs/heads/main/partner_logos/gaia.ico" alt="Gaia" class="app-logo-img">
|
|
53
|
+
<span class="app-name">Gaia</span>
|
|
54
|
+
</a>
|
|
55
|
+
<a href="https://lemonade-server.ai/docs/server/apps/anythingLLM/" target="_blank" class="app-logo-item" title="AnythingLLM">
|
|
56
|
+
<img src="https://raw.githubusercontent.com/lemonade-sdk/assets/refs/heads/main/partner_logos/anything_llm.png" alt="AnythingLLM" class="app-logo-img">
|
|
57
|
+
<span class="app-name">AnythingLLM</span>
|
|
58
|
+
</a>
|
|
59
|
+
<a href="https://lemonade-server.ai/docs/server/apps/ai-dev-gallery/" target="_blank" class="app-logo-item" title="AI Dev Gallery">
|
|
60
|
+
<img src="https://raw.githubusercontent.com/lemonade-sdk/assets/refs/heads/main/partner_logos/ai_dev_gallery.webp" alt="AI Dev Gallery" class="app-logo-img">
|
|
61
|
+
<span class="app-name">AI Dev Gallery</span>
|
|
62
|
+
</a>
|
|
63
|
+
<a href="https://lemonade-server.ai/docs/server/apps/lm-eval/" target="_blank" class="app-logo-item" title="LM-Eval">
|
|
64
|
+
<img src="https://raw.githubusercontent.com/lemonade-sdk/assets/refs/heads/main/partner_logos/lm_eval.png" alt="LM-Eval" class="app-logo-img">
|
|
65
|
+
<span class="app-name">LM-Eval</span>
|
|
66
|
+
</a>
|
|
67
|
+
<a href="https://lemonade-server.ai/docs/server/apps/codeGPT/" target="_blank" class="app-logo-item" title="CodeGPT">
|
|
68
|
+
<img src="https://raw.githubusercontent.com/lemonade-sdk/assets/refs/heads/main/partner_logos/codegpt.jpg" alt="CodeGPT" class="app-logo-img">
|
|
69
|
+
<span class="app-name">CodeGPT</span>
|
|
70
|
+
</a>
|
|
71
|
+
<a href="https://github.com/lemonade-sdk/lemonade/blob/main/docs/server/apps/ai-toolkit.md" target="_blank" class="app-logo-item" title="AI Toolkit">
|
|
72
|
+
<img src="https://raw.githubusercontent.com/lemonade-sdk/assets/refs/heads/main/partner_logos/ai_toolkit.png" alt="AI Toolkit" class="app-logo-img">
|
|
73
|
+
<span class="app-name">AI Toolkit</span>
|
|
74
|
+
</a>
|
|
75
|
+
</div>
|
|
76
|
+
</div>
|
|
37
77
|
</div>
|
|
38
78
|
<div class="tab-content" id="content-models"> <div class="model-mgmt-register-form collapsed"> <h3 class="model-mgmt-form-title" onclick="toggleAddModelForm()">
|
|
39
79
|
Add a Model
|
|
@@ -109,7 +149,66 @@
|
|
|
109
149
|
<div class="copyright">Copyright 2025 AMD</div>
|
|
110
150
|
</footer>
|
|
111
151
|
<script src="https://cdn.jsdelivr.net/npm/openai@4.21.0/dist/openai.min.js"></script>
|
|
112
|
-
<script>
|
|
152
|
+
<script src="https://cdn.jsdelivr.net/npm/marked@9.1.0/marked.min.js"></script>
|
|
153
|
+
<script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script>
|
|
154
|
+
<script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
|
|
155
|
+
<script>
|
|
156
|
+
// Configure MathJax
|
|
157
|
+
window.MathJax = {
|
|
158
|
+
tex: {
|
|
159
|
+
inlineMath: [['\\(', '\\)'], ['$', '$']],
|
|
160
|
+
displayMath: [['\\[', '\\]'], ['$$', '$$']],
|
|
161
|
+
processEscapes: true,
|
|
162
|
+
processEnvironments: true
|
|
163
|
+
},
|
|
164
|
+
options: {
|
|
165
|
+
skipHtmlTags: ['script', 'noscript', 'style', 'textarea', 'pre']
|
|
166
|
+
}
|
|
167
|
+
};
|
|
168
|
+
</script>
|
|
169
|
+
<script>
|
|
170
|
+
// Configure marked.js for safe HTML rendering
|
|
171
|
+
marked.setOptions({
|
|
172
|
+
breaks: true,
|
|
173
|
+
gfm: true,
|
|
174
|
+
sanitize: false,
|
|
175
|
+
smartLists: true,
|
|
176
|
+
smartypants: true
|
|
177
|
+
});
|
|
178
|
+
|
|
179
|
+
// Function to unescape JSON strings
|
|
180
|
+
function unescapeJsonString(str) {
|
|
181
|
+
try {
|
|
182
|
+
return str.replace(/\\n/g, '\n')
|
|
183
|
+
.replace(/\\t/g, '\t')
|
|
184
|
+
.replace(/\\r/g, '\r')
|
|
185
|
+
.replace(/\\"/g, '"')
|
|
186
|
+
.replace(/\\\\/g, '\\');
|
|
187
|
+
} catch (error) {
|
|
188
|
+
console.error('Error unescaping string:', error);
|
|
189
|
+
return str;
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
// Function to safely render markdown with MathJax support
|
|
194
|
+
function renderMarkdown(text) {
|
|
195
|
+
try {
|
|
196
|
+
const html = marked.parse(text);
|
|
197
|
+
// Trigger MathJax to process the new content
|
|
198
|
+
if (window.MathJax && window.MathJax.typesetPromise) {
|
|
199
|
+
// Use a timeout to ensure DOM is updated before typesetting
|
|
200
|
+
setTimeout(() => {
|
|
201
|
+
window.MathJax.typesetPromise();
|
|
202
|
+
}, 0);
|
|
203
|
+
}
|
|
204
|
+
return html;
|
|
205
|
+
} catch (error) {
|
|
206
|
+
console.error('Error rendering markdown:', error);
|
|
207
|
+
return text; // fallback to plain text
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
// Tab switching logic
|
|
113
212
|
function showTab(tab, updateHash = true) {
|
|
114
213
|
document.getElementById('tab-chat').classList.remove('active');
|
|
115
214
|
document.getElementById('tab-models').classList.remove('active');
|
|
@@ -163,6 +262,44 @@
|
|
|
163
262
|
form.classList.toggle('collapsed');
|
|
164
263
|
}
|
|
165
264
|
|
|
265
|
+
// Handle image load failures for app logos
|
|
266
|
+
function handleImageFailure(img) {
|
|
267
|
+
const logoItem = img.closest('.app-logo-item');
|
|
268
|
+
if (logoItem) {
|
|
269
|
+
logoItem.classList.add('image-failed');
|
|
270
|
+
}
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
// Set up image error handlers when DOM is loaded
|
|
274
|
+
document.addEventListener('DOMContentLoaded', function() {
|
|
275
|
+
const logoImages = document.querySelectorAll('.app-logo-img');
|
|
276
|
+
logoImages.forEach(function(img) {
|
|
277
|
+
let imageLoaded = false;
|
|
278
|
+
|
|
279
|
+
img.addEventListener('load', function() {
|
|
280
|
+
imageLoaded = true;
|
|
281
|
+
});
|
|
282
|
+
|
|
283
|
+
img.addEventListener('error', function() {
|
|
284
|
+
if (!imageLoaded) {
|
|
285
|
+
handleImageFailure(this);
|
|
286
|
+
}
|
|
287
|
+
});
|
|
288
|
+
|
|
289
|
+
// Also check if image is already broken (cached failure)
|
|
290
|
+
if (img.complete && img.naturalWidth === 0) {
|
|
291
|
+
handleImageFailure(img);
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
// Timeout fallback for slow connections (5 seconds)
|
|
295
|
+
setTimeout(function() {
|
|
296
|
+
if (!imageLoaded && !img.complete) {
|
|
297
|
+
handleImageFailure(img);
|
|
298
|
+
}
|
|
299
|
+
}, 5000);
|
|
300
|
+
});
|
|
301
|
+
});
|
|
302
|
+
|
|
166
303
|
// Helper to get server base URL
|
|
167
304
|
function getServerBaseUrl() {
|
|
168
305
|
const port = window.SERVER_PORT || 8000;
|
|
@@ -184,17 +321,37 @@
|
|
|
184
321
|
select.innerHTML = '<option>No models available</option>';
|
|
185
322
|
return;
|
|
186
323
|
}
|
|
324
|
+
|
|
325
|
+
// Filter out embedding models from chat interface
|
|
326
|
+
const allModels = window.SERVER_MODELS || {};
|
|
327
|
+
let filteredModels = [];
|
|
187
328
|
let defaultIndex = 0;
|
|
188
|
-
|
|
329
|
+
|
|
330
|
+
data.data.forEach(function(model) {
|
|
189
331
|
const modelId = model.id || model.name || model;
|
|
332
|
+
const modelInfo = allModels[modelId] || {};
|
|
333
|
+
const labels = modelInfo.labels || [];
|
|
334
|
+
|
|
335
|
+
// Skip models with "embeddings" or "reranking" label
|
|
336
|
+
if (labels.includes('embeddings') || labels.includes('reranking')) {
|
|
337
|
+
return;
|
|
338
|
+
}
|
|
339
|
+
|
|
340
|
+
filteredModels.push(modelId);
|
|
190
341
|
const opt = document.createElement('option');
|
|
191
342
|
opt.value = modelId;
|
|
192
343
|
opt.textContent = modelId;
|
|
193
344
|
if (modelId === 'Llama-3.2-1B-Instruct-Hybrid') {
|
|
194
|
-
defaultIndex =
|
|
345
|
+
defaultIndex = filteredModels.length - 1;
|
|
195
346
|
}
|
|
196
347
|
select.appendChild(opt);
|
|
197
348
|
});
|
|
349
|
+
|
|
350
|
+
if (filteredModels.length === 0) {
|
|
351
|
+
select.innerHTML = '<option>No chat models available</option>';
|
|
352
|
+
return;
|
|
353
|
+
}
|
|
354
|
+
|
|
198
355
|
select.selectedIndex = defaultIndex;
|
|
199
356
|
} catch (e) {
|
|
200
357
|
const select = document.getElementById('model-select');
|
|
@@ -217,26 +374,24 @@
|
|
|
217
374
|
|
|
218
375
|
// Add labels if they exist
|
|
219
376
|
const modelData = allModels[modelId];
|
|
220
|
-
if (modelData) {
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
const
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
});
|
|
239
|
-
}
|
|
377
|
+
if (modelData && modelData.labels && Array.isArray(modelData.labels)) {
|
|
378
|
+
modelData.labels.forEach(label => {
|
|
379
|
+
const labelSpan = document.createElement('span');
|
|
380
|
+
const labelLower = label.toLowerCase();
|
|
381
|
+
let labelClass = 'other';
|
|
382
|
+
if (labelLower === 'vision') {
|
|
383
|
+
labelClass = 'vision';
|
|
384
|
+
} else if (labelLower === 'embeddings') {
|
|
385
|
+
labelClass = 'embeddings';
|
|
386
|
+
} else if (labelLower === 'reasoning') {
|
|
387
|
+
labelClass = 'reasoning';
|
|
388
|
+
} else if (labelLower === 'reranking') {
|
|
389
|
+
labelClass = 'reranking';
|
|
390
|
+
}
|
|
391
|
+
labelSpan.className = `model-label ${labelClass}`;
|
|
392
|
+
labelSpan.textContent = label;
|
|
393
|
+
container.appendChild(labelSpan);
|
|
394
|
+
});
|
|
240
395
|
}
|
|
241
396
|
|
|
242
397
|
return container;
|
|
@@ -358,16 +513,110 @@
|
|
|
358
513
|
const modelSelect = document.getElementById('model-select');
|
|
359
514
|
let messages = [];
|
|
360
515
|
|
|
361
|
-
function appendMessage(role, text) {
|
|
516
|
+
function appendMessage(role, text, isMarkdown = false) {
|
|
362
517
|
const div = document.createElement('div');
|
|
363
518
|
div.className = 'chat-message ' + role;
|
|
364
519
|
// Add a bubble for iMessage style
|
|
365
520
|
const bubble = document.createElement('div');
|
|
366
521
|
bubble.className = 'chat-bubble ' + role;
|
|
367
|
-
|
|
522
|
+
|
|
523
|
+
if (role === 'llm' && isMarkdown) {
|
|
524
|
+
bubble.innerHTML = renderMarkdownWithThinkTokens(text);
|
|
525
|
+
} else {
|
|
526
|
+
bubble.textContent = text;
|
|
527
|
+
}
|
|
528
|
+
|
|
368
529
|
div.appendChild(bubble);
|
|
369
530
|
chatHistory.appendChild(div);
|
|
370
531
|
chatHistory.scrollTop = chatHistory.scrollHeight;
|
|
532
|
+
return bubble; // Return the bubble element for streaming updates
|
|
533
|
+
}
|
|
534
|
+
|
|
535
|
+
function updateMessageContent(bubbleElement, text, isMarkdown = false) {
|
|
536
|
+
if (isMarkdown) {
|
|
537
|
+
bubbleElement.innerHTML = renderMarkdownWithThinkTokens(text);
|
|
538
|
+
} else {
|
|
539
|
+
bubbleElement.textContent = text;
|
|
540
|
+
}
|
|
541
|
+
}
|
|
542
|
+
|
|
543
|
+
function renderMarkdownWithThinkTokens(text) {
|
|
544
|
+
// Check if text contains opening think tag
|
|
545
|
+
if (text.includes('<think>')) {
|
|
546
|
+
if (text.includes('</think>')) {
|
|
547
|
+
// Complete think block - handle as before
|
|
548
|
+
const thinkMatch = text.match(/<think>(.*?)<\/think>/s);
|
|
549
|
+
if (thinkMatch) {
|
|
550
|
+
const thinkContent = thinkMatch[1].trim();
|
|
551
|
+
const mainResponse = text.replace(/<think>.*?<\/think>/s, '').trim();
|
|
552
|
+
|
|
553
|
+
// Create collapsible structure
|
|
554
|
+
let html = '';
|
|
555
|
+
if (thinkContent) {
|
|
556
|
+
html += `
|
|
557
|
+
<div class="think-tokens-container">
|
|
558
|
+
<div class="think-tokens-header" onclick="toggleThinkTokens(this)">
|
|
559
|
+
<span class="think-tokens-chevron">▼</span>
|
|
560
|
+
<span class="think-tokens-label">Thinking...</span>
|
|
561
|
+
</div>
|
|
562
|
+
<div class="think-tokens-content">
|
|
563
|
+
${renderMarkdown(thinkContent)}
|
|
564
|
+
</div>
|
|
565
|
+
</div>
|
|
566
|
+
`;
|
|
567
|
+
}
|
|
568
|
+
if (mainResponse) {
|
|
569
|
+
html += `<div class="main-response">${renderMarkdown(mainResponse)}</div>`;
|
|
570
|
+
}
|
|
571
|
+
return html;
|
|
572
|
+
}
|
|
573
|
+
} else {
|
|
574
|
+
// Partial think block - only opening tag found, still being generated
|
|
575
|
+
const thinkMatch = text.match(/<think>(.*)/s);
|
|
576
|
+
if (thinkMatch) {
|
|
577
|
+
const thinkContent = thinkMatch[1];
|
|
578
|
+
const beforeThink = text.substring(0, text.indexOf('<think>'));
|
|
579
|
+
|
|
580
|
+
let html = '';
|
|
581
|
+
if (beforeThink.trim()) {
|
|
582
|
+
html += `<div class="main-response">${renderMarkdown(beforeThink)}</div>`;
|
|
583
|
+
}
|
|
584
|
+
|
|
585
|
+
html += `
|
|
586
|
+
<div class="think-tokens-container">
|
|
587
|
+
<div class="think-tokens-header" onclick="toggleThinkTokens(this)">
|
|
588
|
+
<span class="think-tokens-chevron">▼</span>
|
|
589
|
+
<span class="think-tokens-label">Thinking...</span>
|
|
590
|
+
</div>
|
|
591
|
+
<div class="think-tokens-content">
|
|
592
|
+
${renderMarkdown(thinkContent)}
|
|
593
|
+
</div>
|
|
594
|
+
</div>
|
|
595
|
+
`;
|
|
596
|
+
|
|
597
|
+
return html;
|
|
598
|
+
}
|
|
599
|
+
}
|
|
600
|
+
}
|
|
601
|
+
|
|
602
|
+
// Fallback to normal markdown rendering
|
|
603
|
+
return renderMarkdown(text);
|
|
604
|
+
}
|
|
605
|
+
|
|
606
|
+
function toggleThinkTokens(header) {
|
|
607
|
+
const container = header.parentElement;
|
|
608
|
+
const content = container.querySelector('.think-tokens-content');
|
|
609
|
+
const chevron = header.querySelector('.think-tokens-chevron');
|
|
610
|
+
|
|
611
|
+
if (content.style.display === 'none') {
|
|
612
|
+
content.style.display = 'block';
|
|
613
|
+
chevron.textContent = '▼';
|
|
614
|
+
container.classList.remove('collapsed');
|
|
615
|
+
} else {
|
|
616
|
+
content.style.display = 'none';
|
|
617
|
+
chevron.textContent = '▶';
|
|
618
|
+
container.classList.add('collapsed');
|
|
619
|
+
}
|
|
371
620
|
}
|
|
372
621
|
|
|
373
622
|
async function sendMessage() {
|
|
@@ -379,8 +628,7 @@
|
|
|
379
628
|
sendBtn.disabled = true;
|
|
380
629
|
// Streaming OpenAI completions (placeholder, adapt as needed)
|
|
381
630
|
let llmText = '';
|
|
382
|
-
appendMessage('llm', '...');
|
|
383
|
-
const llmDiv = chatHistory.lastChild.querySelector('.chat-bubble.llm');
|
|
631
|
+
const llmBubble = appendMessage('llm', '...');
|
|
384
632
|
try {
|
|
385
633
|
// Use the correct endpoint for chat completions
|
|
386
634
|
const resp = await fetch(getServerBaseUrl() + '/api/v1/chat/completions', {
|
|
@@ -395,22 +643,40 @@
|
|
|
395
643
|
if (!resp.body) throw new Error('No stream');
|
|
396
644
|
const reader = resp.body.getReader();
|
|
397
645
|
let decoder = new TextDecoder();
|
|
398
|
-
|
|
646
|
+
llmBubble.textContent = '';
|
|
399
647
|
while (true) {
|
|
400
648
|
const { done, value } = await reader.read();
|
|
401
649
|
if (done) break;
|
|
402
650
|
const chunk = decoder.decode(value);
|
|
403
651
|
if (chunk.trim() === 'data: [DONE]' || chunk.trim() === '[DONE]') continue;
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
652
|
+
|
|
653
|
+
// Handle Server-Sent Events format
|
|
654
|
+
const lines = chunk.split('\n');
|
|
655
|
+
for (const line of lines) {
|
|
656
|
+
if (line.startsWith('data: ')) {
|
|
657
|
+
const jsonStr = line.substring(6).trim();
|
|
658
|
+
if (jsonStr === '[DONE]') continue;
|
|
659
|
+
|
|
660
|
+
try {
|
|
661
|
+
const parsed = JSON.parse(jsonStr);
|
|
662
|
+
if (parsed.choices && parsed.choices[0] && parsed.choices[0].delta && parsed.choices[0].delta.content) {
|
|
663
|
+
llmText += parsed.choices[0].delta.content;
|
|
664
|
+
updateMessageContent(llmBubble, llmText, true);
|
|
665
|
+
}
|
|
666
|
+
} catch (e) {
|
|
667
|
+
// Fallback to regex parsing if JSON parsing fails
|
|
668
|
+
const match = jsonStr.match(/"content"\s*:\s*"((?:\\.|[^"\\])*)"/);
|
|
669
|
+
if (match && match[1]) {
|
|
670
|
+
llmText += unescapeJsonString(match[1]);
|
|
671
|
+
updateMessageContent(llmBubble, llmText, true);
|
|
672
|
+
}
|
|
673
|
+
}
|
|
674
|
+
}
|
|
409
675
|
}
|
|
410
676
|
}
|
|
411
677
|
messages.push({ role: 'assistant', content: llmText });
|
|
412
678
|
} catch (e) {
|
|
413
|
-
|
|
679
|
+
llmBubble.textContent = '[Error: ' + e.message + ']';
|
|
414
680
|
}
|
|
415
681
|
sendBtn.disabled = false;
|
|
416
682
|
}
|
lemonade/version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "8.0.
|
|
1
|
+
__version__ = "8.0.4"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: lemonade-sdk
|
|
3
|
-
Version: 8.0.
|
|
3
|
+
Version: 8.0.4
|
|
4
4
|
Summary: Lemonade SDK: Your LLM Aide for Validation and Deployment
|
|
5
5
|
Author-email: lemonade@amd.com
|
|
6
6
|
Requires-Python: >=3.10, <3.12
|
|
@@ -82,7 +82,7 @@ Dynamic: summary
|
|
|
82
82
|
|
|
83
83
|
[](https://github.com/lemonade-sdk/lemonade/tree/main/test "Check out our tests")
|
|
84
84
|
[](docs/README.md#installation "Check out our instructions")
|
|
85
|
-
[](docs/README.md#installation "Check out our instructions")
|
|
86
86
|
|
|
87
87
|
## 🍋 Lemonade SDK: Quickly serve, benchmark and deploy LLMs
|
|
88
88
|
|
|
@@ -97,8 +97,8 @@ The [Lemonade SDK](./docs/README.md) makes it easy to run Large Language Models
|
|
|
97
97
|
The [Lemonade SDK](./docs/README.md) is comprised of the following:
|
|
98
98
|
|
|
99
99
|
- 🌐 **[Lemonade Server](https://lemonade-server.ai/docs)**: A local LLM server for running ONNX and GGUF models using the OpenAI API standard. Install and enable your applications with NPU and GPU acceleration in minutes.
|
|
100
|
-
- 🐍 **Lemonade API**: High-level Python API to directly integrate Lemonade LLMs into Python applications.
|
|
101
|
-
- 🖥️ **Lemonade CLI**: The `lemonade` CLI lets you mix-and-match LLMs (ONNX, GGUF, SafeTensors) with measurement tools to characterize your models on your hardware. The available tools are:
|
|
100
|
+
- 🐍 **[Lemonade API](./docs/lemonade_api.md)**: High-level Python API to directly integrate Lemonade LLMs into Python applications.
|
|
101
|
+
- 🖥️ **[Lemonade CLI](./docs/dev_cli/README.md)**: The `lemonade` CLI lets you mix-and-match LLMs (ONNX, GGUF, SafeTensors) with measurement tools to characterize your models on your hardware. The available tools are:
|
|
102
102
|
- Prompting with templates.
|
|
103
103
|
- Measuring accuracy with a variety of tests.
|
|
104
104
|
- Benchmarking to get the time-to-first-token and tokens per second.
|
|
@@ -153,14 +153,7 @@ Maximum LLM performance requires the right hardware accelerator with the right i
|
|
|
153
153
|
</tbody>
|
|
154
154
|
</table>
|
|
155
155
|
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
#### Inference Engines Overview
|
|
159
|
-
| Engine | Description |
|
|
160
|
-
| :--- | :--- |
|
|
161
|
-
| **OnnxRuntime GenAI (OGA)** | Microsoft engine that runs `.onnx` models and enables hardware vendors to provide their own execution providers (EPs) to support specialized hardware, such as neural processing units (NPUs). |
|
|
162
|
-
| **llamacpp** | Community-driven engine with strong GPU acceleration, support for thousands of `.gguf` models, and advanced features such as vision-language models (VLMs) and mixture-of-experts (MoEs). |
|
|
163
|
-
| **Hugging Face (HF)** | Hugging Face's `transformers` library can run the original `.safetensors` trained weights for models on Meta's PyTorch engine, which provides a source of truth for accuracy measurement. |
|
|
156
|
+
To learn more about the supported hardware and software, visit the documentation [here](./docs/README.md#software-and-hardware-overview).
|
|
164
157
|
|
|
165
158
|
## Integrate Lemonade Server with Your Application
|
|
166
159
|
|
|
@@ -4,7 +4,7 @@ lemonade/cache.py,sha256=djr2qgyUUAWlQv8FehU9qlNtCwK0IZqo82hcBDyZ3-A,2850
|
|
|
4
4
|
lemonade/cli.py,sha256=9Pcs3PcrWC2F8_pcBaz09xHUICIJTvpemBdPGyXkjIk,4395
|
|
5
5
|
lemonade/sequence.py,sha256=KSH7BPsiyDKsOsg_ziQKEGsDwMmuO_YbgPRBxkZd0pw,13267
|
|
6
6
|
lemonade/state.py,sha256=sdSezla7Cd7KYL90xY3p9kcNV4ndSyN6UvNLOr3vBMA,5261
|
|
7
|
-
lemonade/version.py,sha256=
|
|
7
|
+
lemonade/version.py,sha256=8H4GfArMIlRTCgSsTERRXsD3PA6Y67z17oTQOJnuUME,22
|
|
8
8
|
lemonade/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
9
9
|
lemonade/common/build.py,sha256=zTb0m1-kuUx6zw5QHp2SNnVuN6jOTMQ2FCdj9iH374U,6140
|
|
10
10
|
lemonade/common/cli_helpers.py,sha256=hjBfXrTtFl8gmCFlL-ksviXR0mOcdPtTWVNKoEp3PG4,4993
|
|
@@ -22,10 +22,10 @@ lemonade/tools/__init__.py,sha256=_6xRc-FHxmujoLjLjWtpYrWYEXtCSneSy-5ya01kyPk,53
|
|
|
22
22
|
lemonade/tools/accuracy.py,sha256=9HCmczDngkBUuUrt49d2CkRo4J0qyWoFYs5cj20bGkg,11714
|
|
23
23
|
lemonade/tools/adapter.py,sha256=HG54iMd6HDPZ4vnQIl7codq3HzffWbcHSIs_jVbNbhU,2958
|
|
24
24
|
lemonade/tools/bench.py,sha256=aN5LMA_EH6-ZhAH3Gf26JYL7s0eKpUd3j-bReRhzvEY,10016
|
|
25
|
-
lemonade/tools/humaneval.py,sha256=
|
|
25
|
+
lemonade/tools/humaneval.py,sha256=JbxuoOzvR4iyxZv4R6MI7a3gUt5ef_Jj6Ie-9VP2wzY,9531
|
|
26
26
|
lemonade/tools/management_tools.py,sha256=RO-lU-hjZhrP9KD9qcLI7MrLu-Rxnkrxzn45qqwKInE,8554
|
|
27
|
-
lemonade/tools/mmlu.py,sha256=
|
|
28
|
-
lemonade/tools/perplexity.py,sha256=
|
|
27
|
+
lemonade/tools/mmlu.py,sha256=c2QaIMDzjqxCvgHlMXmy_dP1sAFkwkDxL7RO2nogI6s,11071
|
|
28
|
+
lemonade/tools/perplexity.py,sha256=eiaTZ3yhqF2pfwOffVbKKJLwjSri7Im2pC-tBJr7LLU,5638
|
|
29
29
|
lemonade/tools/prompt.py,sha256=cy6McZeLgk26xG1dJEY-cYnY2x8FUdyOOSG86WfBKCg,9348
|
|
30
30
|
lemonade/tools/tool.py,sha256=UsxVYukfm_iM3BfeGYPZxQlTK5UfDfDOl3RIyLr8A1Y,13256
|
|
31
31
|
lemonade/tools/huggingface/bench.py,sha256=-mTfldCtquL4mspq8ykVwDc9Mut5Ecv_jHJnSb0CYGE,6734
|
|
@@ -35,36 +35,36 @@ lemonade/tools/llamacpp/bench.py,sha256=A1X8ULQMxPVsff-AdiUsbWQUKpx7U7nFRNHFJRPd
|
|
|
35
35
|
lemonade/tools/llamacpp/load.py,sha256=o3vVlefdxmdkHnuvFR3TOxiJkpNAuNFcs9Whfp24jpg,9236
|
|
36
36
|
lemonade/tools/oga/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
37
37
|
lemonade/tools/oga/bench.py,sha256=T3c40NevM3NA7CT98B6vBj1nXfdITDqpfMHYSjhjwpA,5061
|
|
38
|
-
lemonade/tools/oga/load.py,sha256=
|
|
38
|
+
lemonade/tools/oga/load.py,sha256=XSznW8lOX_KafSq5J5mIBJzj8YJEBpK0RFGcTE1wnE8,28317
|
|
39
39
|
lemonade/tools/oga/utils.py,sha256=p7faMNfT-rLURC9t_s1S_STQRzzLADqbngUliTOOXeQ,16144
|
|
40
40
|
lemonade/tools/quark/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
41
|
-
lemonade/tools/quark/quark_load.py,sha256=
|
|
42
|
-
lemonade/tools/quark/quark_quantize.py,sha256=
|
|
41
|
+
lemonade/tools/quark/quark_load.py,sha256=FJ4LJKTToZbHHWVEOBLadae1a3jCnnY4KvXySHbkJMA,5589
|
|
42
|
+
lemonade/tools/quark/quark_quantize.py,sha256=hwoaXhpBIORvJ16MvewphPkaDEQn3BAgXq5o82Gc-_s,16599
|
|
43
43
|
lemonade/tools/report/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
44
44
|
lemonade/tools/report/llm_report.py,sha256=bVHhwCINA-Ok2EdSwAsLubsc83N3KWOVuwTguw7jDcE,6676
|
|
45
45
|
lemonade/tools/report/table.py,sha256=wJFzKtlmGQH0RQ5O9nevtpMe_-zQ-8zNOndINQuzsjM,27793
|
|
46
46
|
lemonade/tools/server/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
47
|
-
lemonade/tools/server/llamacpp.py,sha256=
|
|
48
|
-
lemonade/tools/server/serve.py,sha256=
|
|
47
|
+
lemonade/tools/server/llamacpp.py,sha256=e1MYKSJBu-jlOE5GQSBsC9CUPAeqw5wXXxoxBKA5zb8,20038
|
|
48
|
+
lemonade/tools/server/serve.py,sha256=ORffC4bcBJ-L5-JbmZX91X3yHt1JWxZcIjrZuu9x8TQ,56165
|
|
49
49
|
lemonade/tools/server/tool_calls.py,sha256=xrAlQwKG-nv2xLlf8f9CDSaUbyMn8ZtHkds9iZLG9K8,5230
|
|
50
50
|
lemonade/tools/server/tray.py,sha256=4Kf3x8YfRaItPW7lxlEwerD7c5Q2snzcNk3ZrEoae58,17259
|
|
51
51
|
lemonade/tools/server/webapp.py,sha256=8Das5yXOaSBLZmSZ_eddJajQFxBhvl5D6GI_hHlGbE0,1040
|
|
52
52
|
lemonade/tools/server/static/favicon.ico,sha256=hMmP9qGJNeZ0mFS86JIqPbZstXMZn0Z76_HfHQpREAU,126745
|
|
53
|
-
lemonade/tools/server/static/styles.css,sha256=
|
|
54
|
-
lemonade/tools/server/static/webapp.html,sha256=
|
|
53
|
+
lemonade/tools/server/static/styles.css,sha256=x-pf7xts0te9JWAafcNFqzE7r1fl6n_H362Eiz49ixI,24722
|
|
54
|
+
lemonade/tools/server/static/webapp.html,sha256=AS61ZBDnZkIUpT-iZFlTnWpkp6Yeozs4obzauX4crlU,35004
|
|
55
55
|
lemonade/tools/server/utils/port.py,sha256=XnIg2qS73QRrsJn6LgHcrJPmku30Tv6vsYcBVMj82K4,2186
|
|
56
56
|
lemonade/tools/server/utils/system_tray.py,sha256=b9lvNv9chJKQxvmH7qzAuUe6H9HsLu7pdHFqGlAJaL0,12654
|
|
57
57
|
lemonade/tools/server/utils/thread.py,sha256=pK9K_6DNWoQ78NArkAX3Ym2WsxLnCs9sKTk6TitlYnI,2804
|
|
58
58
|
lemonade_install/__init__.py,sha256=26zohKg2jgr_5y7tObduWMYQg8zCTWMZHL8lfi2zZVQ,40
|
|
59
59
|
lemonade_install/install.py,sha256=DJWR36QSjZtvEwRjYPNSjhYgoxLjI_6OPrCMZjL0ChY,28263
|
|
60
|
-
lemonade_sdk-8.0.
|
|
61
|
-
lemonade_sdk-8.0.
|
|
60
|
+
lemonade_sdk-8.0.4.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
61
|
+
lemonade_sdk-8.0.4.dist-info/licenses/NOTICE.md,sha256=B8lEqi4QE41J9ljz4Riv2JgHD1v8GCZE6nNBHO3KIA0,2135
|
|
62
62
|
lemonade_server/cli.py,sha256=z6ojwFaOIz0hbUbVtZWMLP4YDpkcVOmqwmdm55dhKA4,11980
|
|
63
|
-
lemonade_server/model_manager.py,sha256=
|
|
64
|
-
lemonade_server/pydantic_models.py,sha256=
|
|
65
|
-
lemonade_server/server_models.json,sha256=
|
|
66
|
-
lemonade_sdk-8.0.
|
|
67
|
-
lemonade_sdk-8.0.
|
|
68
|
-
lemonade_sdk-8.0.
|
|
69
|
-
lemonade_sdk-8.0.
|
|
70
|
-
lemonade_sdk-8.0.
|
|
63
|
+
lemonade_server/model_manager.py,sha256=0HqLR38uOu_hxRWVYQ_P6YmwaR-jkDuaAqGYo60X8C0,16702
|
|
64
|
+
lemonade_server/pydantic_models.py,sha256=rp_FFhoTwg6jNmgol-kShwffnRDGbt7jTbIeELvgOIo,2876
|
|
65
|
+
lemonade_server/server_models.json,sha256=Y-j9KAvHmfv77welC0rfRao4inLBce6AVySb-oy_uNE,7519
|
|
66
|
+
lemonade_sdk-8.0.4.dist-info/METADATA,sha256=FqA9Jtgx1QE1EjLg_lxcfcAMI3j0cKpZxoe4GnaGLRA,7754
|
|
67
|
+
lemonade_sdk-8.0.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
68
|
+
lemonade_sdk-8.0.4.dist-info/entry_points.txt,sha256=gJppn0ETtXXR6ceKWEIRdk42kMC7ps59EmU3NCPyPUk,144
|
|
69
|
+
lemonade_sdk-8.0.4.dist-info/top_level.txt,sha256=10ap5GNiPhalO4V50LRoxA1FqRT9g3Xkia6BITu880k,42
|
|
70
|
+
lemonade_sdk-8.0.4.dist-info/RECORD,,
|
lemonade_server/model_manager.py
CHANGED
|
@@ -54,6 +54,17 @@ class ModelManager:
|
|
|
54
54
|
for model_name, model_info in user_models.items()
|
|
55
55
|
}
|
|
56
56
|
|
|
57
|
+
# Backwards compatibility for user models that were created before version 8.0.4
|
|
58
|
+
# "reasoning" was a boolean, but as of 8.0.4 it became a label
|
|
59
|
+
for _, model_info in user_models.items():
|
|
60
|
+
if "reasoning" in model_info:
|
|
61
|
+
model_info["labels"] = (
|
|
62
|
+
["reasoning"]
|
|
63
|
+
if not model_info["labels"]
|
|
64
|
+
else model_info["labels"] + ["reasoning"]
|
|
65
|
+
)
|
|
66
|
+
del model_info["reasoning"]
|
|
67
|
+
|
|
57
68
|
models.update(user_models)
|
|
58
69
|
|
|
59
70
|
# Add the model name as a key in each entry, to make it easier
|
|
@@ -268,9 +279,8 @@ class ModelManager:
|
|
|
268
279
|
new_user_model = {
|
|
269
280
|
"checkpoint": checkpoint,
|
|
270
281
|
"recipe": recipe,
|
|
271
|
-
"reasoning": reasoning,
|
|
272
282
|
"suggested": True,
|
|
273
|
-
"labels": ["custom"],
|
|
283
|
+
"labels": ["custom"] + (["reasoning"] if reasoning else []),
|
|
274
284
|
}
|
|
275
285
|
|
|
276
286
|
if mmproj:
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Optional
|
|
1
|
+
from typing import Optional, Union, List, Any
|
|
2
2
|
|
|
3
3
|
from pydantic import BaseModel
|
|
4
4
|
|
|
@@ -65,6 +65,30 @@ class ChatCompletionRequest(BaseModel):
|
|
|
65
65
|
response_format: dict | None = None
|
|
66
66
|
|
|
67
67
|
|
|
68
|
+
class EmbeddingsRequest(BaseModel):
|
|
69
|
+
"""
|
|
70
|
+
Request model for embeddings API endpoint.
|
|
71
|
+
|
|
72
|
+
Generates embeddings for the provided input text or tokens.
|
|
73
|
+
"""
|
|
74
|
+
|
|
75
|
+
input: Union[str, List]
|
|
76
|
+
model: Optional[str] = None
|
|
77
|
+
encoding_format: Optional[str] = "float" # "float" or "base64"
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
class RerankingRequest(BaseModel):
|
|
81
|
+
"""
|
|
82
|
+
Request model for reranking API endpoint.
|
|
83
|
+
|
|
84
|
+
Reranks a list of documents based on their relevance to a query.
|
|
85
|
+
"""
|
|
86
|
+
|
|
87
|
+
query: str
|
|
88
|
+
documents: List[str]
|
|
89
|
+
model: str
|
|
90
|
+
|
|
91
|
+
|
|
68
92
|
class ResponsesRequest(BaseModel):
|
|
69
93
|
"""
|
|
70
94
|
Request model for responses API endpoint.
|