lemonade-sdk 8.1.6__py3-none-any.whl → 8.1.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lemonade-sdk might be problematic. Click here for more details.
- lemonade/cli.py +0 -4
- lemonade/tools/report/table.py +1 -1
- lemonade/tools/server/llamacpp.py +16 -1
- lemonade/tools/server/serve.py +39 -9
- lemonade/tools/server/static/js/chat.js +79 -15
- lemonade/tools/server/static/js/models.js +11 -0
- lemonade/tools/server/static/js/shared.js +11 -3
- lemonade/tools/server/static/styles.css +31 -7
- lemonade/tools/server/static/webapp.html +4 -4
- lemonade/tools/server/wrapped_server.py +8 -0
- lemonade/version.py +1 -1
- lemonade_install/install.py +15 -49
- {lemonade_sdk-8.1.6.dist-info → lemonade_sdk-8.1.8.dist-info}/METADATA +15 -63
- {lemonade_sdk-8.1.6.dist-info → lemonade_sdk-8.1.8.dist-info}/RECORD +21 -24
- lemonade_server/cli.py +12 -9
- lemonade_server/model_manager.py +10 -9
- lemonade/tools/quark/__init__.py +0 -0
- lemonade/tools/quark/quark_load.py +0 -173
- lemonade/tools/quark/quark_quantize.py +0 -439
- {lemonade_sdk-8.1.6.dist-info → lemonade_sdk-8.1.8.dist-info}/WHEEL +0 -0
- {lemonade_sdk-8.1.6.dist-info → lemonade_sdk-8.1.8.dist-info}/entry_points.txt +0 -0
- {lemonade_sdk-8.1.6.dist-info → lemonade_sdk-8.1.8.dist-info}/licenses/LICENSE +0 -0
- {lemonade_sdk-8.1.6.dist-info → lemonade_sdk-8.1.8.dist-info}/licenses/NOTICE.md +0 -0
- {lemonade_sdk-8.1.6.dist-info → lemonade_sdk-8.1.8.dist-info}/top_level.txt +0 -0
lemonade/cli.py
CHANGED
|
@@ -25,8 +25,6 @@ from lemonade.tools.humaneval import AccuracyHumaneval
|
|
|
25
25
|
from lemonade.tools.perplexity import AccuracyPerplexity
|
|
26
26
|
from lemonade.tools.accuracy import LMEvalHarness
|
|
27
27
|
from lemonade.tools.prompt import LLMPrompt
|
|
28
|
-
from lemonade.tools.quark.quark_load import QuarkLoad
|
|
29
|
-
from lemonade.tools.quark.quark_quantize import QuarkQuantize
|
|
30
28
|
from lemonade.tools.report.llm_report import LemonadeReport
|
|
31
29
|
|
|
32
30
|
|
|
@@ -45,8 +43,6 @@ def main():
|
|
|
45
43
|
HuggingfaceBench,
|
|
46
44
|
OgaLoad,
|
|
47
45
|
OgaBench,
|
|
48
|
-
QuarkQuantize,
|
|
49
|
-
QuarkLoad,
|
|
50
46
|
LemonadeReport,
|
|
51
47
|
# Inherited from lemonade
|
|
52
48
|
Cache,
|
lemonade/tools/report/table.py
CHANGED
|
@@ -591,7 +591,7 @@ class LemonadePerfTable(Table):
|
|
|
591
591
|
_wrap("Total Generated Tokens", 9),
|
|
592
592
|
Keys.RESPONSE_TOKENS,
|
|
593
593
|
"d",
|
|
594
|
-
stat_fn=sum,
|
|
594
|
+
stat_fn=lambda x: sum(_to_list(x)),
|
|
595
595
|
),
|
|
596
596
|
SimpleStat(
|
|
597
597
|
_wrap("Memory Used (GB)", 8), Keys.MAX_MEMORY_USED_GBYTE, ".3f"
|
|
@@ -6,6 +6,7 @@ import threading
|
|
|
6
6
|
import platform
|
|
7
7
|
|
|
8
8
|
from dotenv import load_dotenv
|
|
9
|
+
from fastapi import HTTPException, status
|
|
9
10
|
|
|
10
11
|
from lemonade_server.pydantic_models import (
|
|
11
12
|
PullConfig,
|
|
@@ -28,6 +29,20 @@ class LlamaTelemetry(WrappedServerTelemetry):
|
|
|
28
29
|
Parse telemetry data from llama server output lines.
|
|
29
30
|
"""
|
|
30
31
|
|
|
32
|
+
if "vk::PhysicalDevice::createDevice: ErrorExtensionNotPresent" in line:
|
|
33
|
+
msg = (
|
|
34
|
+
"Your AMD GPU driver version is not compatible with this software.\n"
|
|
35
|
+
"Please update and try again: "
|
|
36
|
+
"https://www.amd.com/en/support/download/drivers.html"
|
|
37
|
+
)
|
|
38
|
+
logging.error(msg)
|
|
39
|
+
raise HTTPException(
|
|
40
|
+
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
|
41
|
+
detail=msg,
|
|
42
|
+
)
|
|
43
|
+
elif "error" in line.lower():
|
|
44
|
+
logging.error(line)
|
|
45
|
+
|
|
31
46
|
# Parse Vulkan device detection
|
|
32
47
|
vulkan_match = re.search(r"ggml_vulkan: Found (\d+) Vulkan devices?:", line)
|
|
33
48
|
if vulkan_match:
|
|
@@ -182,7 +197,7 @@ class LlamaServer(WrappedServer):
|
|
|
182
197
|
exe_dir = os.path.dirname(exe_path)
|
|
183
198
|
env_file_path = os.path.join(exe_dir, ".env")
|
|
184
199
|
if os.path.exists(env_file_path):
|
|
185
|
-
load_dotenv(env_file_path, override=
|
|
200
|
+
load_dotenv(env_file_path, override=False)
|
|
186
201
|
env.update(os.environ)
|
|
187
202
|
logging.debug(f"Loaded environment variables from {env_file_path}")
|
|
188
203
|
|
lemonade/tools/server/serve.py
CHANGED
|
@@ -133,6 +133,21 @@ class StopOnEvent:
|
|
|
133
133
|
return self.stop_event.is_set()
|
|
134
134
|
|
|
135
135
|
|
|
136
|
+
class NoCacheStaticFiles(StaticFiles):
|
|
137
|
+
"""Custom StaticFiles class with no-cache headers"""
|
|
138
|
+
|
|
139
|
+
def __init__(self, *args, **kwargs):
|
|
140
|
+
super().__init__(*args, **kwargs)
|
|
141
|
+
|
|
142
|
+
def file_response(self, *args, **kwargs) -> Response:
|
|
143
|
+
response = super().file_response(*args, **kwargs)
|
|
144
|
+
# Add no-cache headers for all static files
|
|
145
|
+
response.headers["Cache-Control"] = "no-cache, no-store, must-revalidate"
|
|
146
|
+
response.headers["Pragma"] = "no-cache"
|
|
147
|
+
response.headers["Expires"] = "0"
|
|
148
|
+
return response
|
|
149
|
+
|
|
150
|
+
|
|
136
151
|
class Server:
|
|
137
152
|
"""
|
|
138
153
|
Open a web server that apps can use to communicate with the LLM.
|
|
@@ -198,7 +213,7 @@ class Server:
|
|
|
198
213
|
# as the Web App
|
|
199
214
|
static_dir = Path(__file__).parent / "static"
|
|
200
215
|
self.app.mount(
|
|
201
|
-
"/static",
|
|
216
|
+
"/static", NoCacheStaticFiles(directory=static_dir), name="static_assets"
|
|
202
217
|
)
|
|
203
218
|
|
|
204
219
|
# Performance stats that are set during /ws and can be
|
|
@@ -1145,18 +1160,33 @@ class Server:
|
|
|
1145
1160
|
)
|
|
1146
1161
|
self.input_tokens = len(input_ids[0])
|
|
1147
1162
|
|
|
1148
|
-
|
|
1149
|
-
|
|
1150
|
-
|
|
1151
|
-
|
|
1152
|
-
|
|
1163
|
+
max_prompt_length = self.ctx_size # Default fallback
|
|
1164
|
+
# For OGA models, try to read the actual max prompt length from config
|
|
1165
|
+
if "oga-" in self.llm_loaded.recipe:
|
|
1166
|
+
try:
|
|
1167
|
+
if model.config and model.config.get("max_prompt_length"):
|
|
1168
|
+
max_prompt_length = model.config["max_prompt_length"]
|
|
1169
|
+
logging.debug(
|
|
1170
|
+
f"Using OGA model max_prompt_length: {max_prompt_length}"
|
|
1171
|
+
)
|
|
1172
|
+
# pylint: disable=broad-exception-caught
|
|
1173
|
+
except Exception as e:
|
|
1174
|
+
logging.debug(f"Could not read OGA model config, using ctx_size: {e}")
|
|
1153
1175
|
|
|
1176
|
+
# Apply truncation if input exceeds the limit
|
|
1177
|
+
if self.input_tokens > max_prompt_length:
|
|
1178
|
+
# Truncate input ids
|
|
1179
|
+
truncate_amount = self.input_tokens - max_prompt_length
|
|
1180
|
+
input_ids = input_ids[:max_prompt_length]
|
|
1154
1181
|
# Update token count
|
|
1155
|
-
self.
|
|
1182
|
+
if "oga-" in self.llm_loaded.recipe:
|
|
1183
|
+
self.input_tokens = len(input_ids)
|
|
1184
|
+
else:
|
|
1185
|
+
self.input_tokens = len(input_ids[0])
|
|
1156
1186
|
|
|
1157
|
-
#
|
|
1187
|
+
# Log warning message instead of raising exception
|
|
1158
1188
|
truncation_message = (
|
|
1159
|
-
f"Input exceeded {
|
|
1189
|
+
f"Input exceeded {max_prompt_length} tokens. "
|
|
1160
1190
|
f"Truncated {truncate_amount} tokens from the beginning."
|
|
1161
1191
|
)
|
|
1162
1192
|
logging.warning(truncation_message)
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
// Chat logic and functionality
|
|
2
2
|
let messages = [];
|
|
3
3
|
let attachedFiles = [];
|
|
4
|
+
let systemMessageElement = null;
|
|
4
5
|
|
|
5
6
|
// Default model configuration
|
|
6
7
|
const DEFAULT_MODEL = 'Qwen2.5-0.5B-Instruct-CPU';
|
|
@@ -28,6 +29,9 @@ document.addEventListener('DOMContentLoaded', function() {
|
|
|
28
29
|
// Update attachment button state periodically
|
|
29
30
|
updateAttachmentButtonState();
|
|
30
31
|
setInterval(updateAttachmentButtonState, 1000);
|
|
32
|
+
|
|
33
|
+
// Display initial system message
|
|
34
|
+
displaySystemMessage();
|
|
31
35
|
});
|
|
32
36
|
|
|
33
37
|
function setupChatEventListeners() {
|
|
@@ -163,25 +167,30 @@ function updateAttachmentButtonState() {
|
|
|
163
167
|
attachmentBtn.style.opacity = '0.5';
|
|
164
168
|
attachmentBtn.style.cursor = 'not-allowed';
|
|
165
169
|
attachmentBtn.title = 'Load a model first';
|
|
166
|
-
return;
|
|
167
|
-
}
|
|
168
|
-
|
|
169
|
-
const isVision = isVisionModel(currentLoadedModel);
|
|
170
|
-
|
|
171
|
-
if (isVision) {
|
|
172
|
-
attachmentBtn.style.opacity = '1';
|
|
173
|
-
attachmentBtn.style.cursor = 'pointer';
|
|
174
|
-
attachmentBtn.title = 'Attach images';
|
|
175
170
|
} else {
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
171
|
+
const isVision = isVisionModel(currentLoadedModel);
|
|
172
|
+
|
|
173
|
+
if (isVision) {
|
|
174
|
+
attachmentBtn.style.opacity = '1';
|
|
175
|
+
attachmentBtn.style.cursor = 'pointer';
|
|
176
|
+
attachmentBtn.title = 'Attach images';
|
|
177
|
+
} else {
|
|
178
|
+
attachmentBtn.style.opacity = '0.5';
|
|
179
|
+
attachmentBtn.style.cursor = 'not-allowed';
|
|
180
|
+
attachmentBtn.title = 'Image attachments not supported by this model';
|
|
181
|
+
}
|
|
179
182
|
}
|
|
183
|
+
|
|
184
|
+
// Update system message when model state changes
|
|
185
|
+
displaySystemMessage();
|
|
180
186
|
}
|
|
181
187
|
|
|
182
188
|
// Make updateAttachmentButtonState accessible globally
|
|
183
189
|
window.updateAttachmentButtonState = updateAttachmentButtonState;
|
|
184
190
|
|
|
191
|
+
// Make displaySystemMessage accessible globally
|
|
192
|
+
window.displaySystemMessage = displaySystemMessage;
|
|
193
|
+
|
|
185
194
|
// Auto-load default model and send message
|
|
186
195
|
async function autoLoadDefaultModelAndSend() {
|
|
187
196
|
// Check if default model is available and installed
|
|
@@ -217,6 +226,7 @@ async function autoLoadDefaultModelAndSend() {
|
|
|
217
226
|
},
|
|
218
227
|
onError: (error, failedModelId) => {
|
|
219
228
|
console.error('Error auto-loading default model:', error);
|
|
229
|
+
showErrorBanner('Failed to load model: ' + error.message);
|
|
220
230
|
}
|
|
221
231
|
});
|
|
222
232
|
}
|
|
@@ -277,7 +287,7 @@ function handleChatInputKeydown(e) {
|
|
|
277
287
|
if (e.key === 'Escape' && attachedFiles.length > 0) {
|
|
278
288
|
e.preventDefault();
|
|
279
289
|
clearAttachments();
|
|
280
|
-
} else if (e.key === 'Enter') {
|
|
290
|
+
} else if (e.key === 'Enter' && !e.shiftKey) {
|
|
281
291
|
// Check if we have a loaded model
|
|
282
292
|
if (currentLoadedModel && modelSelect.value !== '' && !modelSelect.disabled) {
|
|
283
293
|
sendMessage();
|
|
@@ -438,7 +448,8 @@ function appendMessage(role, text, isMarkdown = false) {
|
|
|
438
448
|
const bubble = document.createElement('div');
|
|
439
449
|
bubble.className = 'chat-bubble ' + role;
|
|
440
450
|
|
|
441
|
-
if
|
|
451
|
+
// Check if isMarkdown is true, regardless of role
|
|
452
|
+
if (isMarkdown) {
|
|
442
453
|
bubble.innerHTML = renderMarkdownWithThinkTokens(text);
|
|
443
454
|
} else {
|
|
444
455
|
bubble.textContent = text;
|
|
@@ -450,6 +461,53 @@ function appendMessage(role, text, isMarkdown = false) {
|
|
|
450
461
|
return bubble; // Return the bubble element for streaming updates
|
|
451
462
|
}
|
|
452
463
|
|
|
464
|
+
// Display system message based on current state
|
|
465
|
+
function displaySystemMessage() {
|
|
466
|
+
// Remove existing system message if it exists
|
|
467
|
+
if (systemMessageElement) {
|
|
468
|
+
systemMessageElement.remove();
|
|
469
|
+
systemMessageElement = null;
|
|
470
|
+
}
|
|
471
|
+
|
|
472
|
+
// Don't show system message if there are already user/LLM messages
|
|
473
|
+
if (messages.length > 0) {
|
|
474
|
+
return;
|
|
475
|
+
}
|
|
476
|
+
|
|
477
|
+
let messageText = '';
|
|
478
|
+
|
|
479
|
+
// Check if any models are installed
|
|
480
|
+
const hasInstalledModels = window.installedModels && window.installedModels.size > 0;
|
|
481
|
+
|
|
482
|
+
if (!hasInstalledModels) {
|
|
483
|
+
// No models installed - show first message
|
|
484
|
+
messageText = `Welcome to Lemonade! To get started:
|
|
485
|
+
1. Head over to the Model Management tab.
|
|
486
|
+
2. Use the 📥Download button to download a model.
|
|
487
|
+
3. Use the 🚀Load button to load the model.
|
|
488
|
+
4. Come back to this tab, and you are ready to chat with the model.`;
|
|
489
|
+
} else if (!currentLoadedModel) {
|
|
490
|
+
// Models available but none loaded - show second message
|
|
491
|
+
messageText = 'Welcome to Lemonade! Choose a model from the dropdown menu below to load it and start chatting.';
|
|
492
|
+
}
|
|
493
|
+
|
|
494
|
+
if (messageText) {
|
|
495
|
+
const div = document.createElement('div');
|
|
496
|
+
div.className = 'chat-message system';
|
|
497
|
+
div.setAttribute('data-system-message', 'true');
|
|
498
|
+
|
|
499
|
+
const bubble = document.createElement('div');
|
|
500
|
+
bubble.className = 'chat-bubble system';
|
|
501
|
+
bubble.textContent = messageText;
|
|
502
|
+
|
|
503
|
+
div.appendChild(bubble);
|
|
504
|
+
chatHistory.appendChild(div);
|
|
505
|
+
chatHistory.scrollTop = chatHistory.scrollHeight;
|
|
506
|
+
|
|
507
|
+
systemMessageElement = div;
|
|
508
|
+
}
|
|
509
|
+
}
|
|
510
|
+
|
|
453
511
|
function updateMessageContent(bubbleElement, text, isMarkdown = false) {
|
|
454
512
|
if (isMarkdown) {
|
|
455
513
|
bubbleElement.innerHTML = renderMarkdownWithThinkTokens(text);
|
|
@@ -541,6 +599,12 @@ async function sendMessage() {
|
|
|
541
599
|
const text = chatInput.value.trim();
|
|
542
600
|
if (!text && attachedFiles.length === 0) return;
|
|
543
601
|
|
|
602
|
+
// Remove system message when user starts chatting
|
|
603
|
+
if (systemMessageElement) {
|
|
604
|
+
systemMessageElement.remove();
|
|
605
|
+
systemMessageElement = null;
|
|
606
|
+
}
|
|
607
|
+
|
|
544
608
|
// Check if a model is loaded, if not, automatically load the default model
|
|
545
609
|
if (!currentLoadedModel) {
|
|
546
610
|
const allModels = window.SERVER_MODELS || {};
|
|
@@ -624,7 +688,7 @@ async function sendMessage() {
|
|
|
624
688
|
displayText = displayText ? `${displayText}\n[Images: ${fileNames}]` : `[Images: ${fileNames}]`;
|
|
625
689
|
}
|
|
626
690
|
|
|
627
|
-
appendMessage('user', displayText);
|
|
691
|
+
appendMessage('user', displayText, true);
|
|
628
692
|
|
|
629
693
|
// Add to messages array
|
|
630
694
|
const userMessage = {
|
|
@@ -59,6 +59,11 @@ async function updateModelStatusIndicator() {
|
|
|
59
59
|
window.initializeModelDropdown();
|
|
60
60
|
}
|
|
61
61
|
|
|
62
|
+
// Update system message when model status changes
|
|
63
|
+
if (window.displaySystemMessage) {
|
|
64
|
+
window.displaySystemMessage();
|
|
65
|
+
}
|
|
66
|
+
|
|
62
67
|
// Refresh model management UI if we're on the models tab
|
|
63
68
|
const modelsTab = document.getElementById('content-models');
|
|
64
69
|
if (modelsTab && modelsTab.classList.contains('active')) {
|
|
@@ -417,6 +422,7 @@ async function loadModel(modelId) {
|
|
|
417
422
|
},
|
|
418
423
|
onError: (error, failedModelId) => {
|
|
419
424
|
console.error(`Failed to load model ${failedModelId}:`, error);
|
|
425
|
+
showErrorBanner('Failed to load model: ' + error.message);
|
|
420
426
|
}
|
|
421
427
|
});
|
|
422
428
|
}
|
|
@@ -699,6 +705,11 @@ async function refreshModelMgmtUI() {
|
|
|
699
705
|
if (window.initializeModelDropdown) {
|
|
700
706
|
window.initializeModelDropdown();
|
|
701
707
|
}
|
|
708
|
+
|
|
709
|
+
// Update system message when installed models change
|
|
710
|
+
if (window.displaySystemMessage) {
|
|
711
|
+
window.displaySystemMessage();
|
|
712
|
+
}
|
|
702
713
|
}
|
|
703
714
|
|
|
704
715
|
// Make refreshModelMgmtUI globally accessible
|
|
@@ -54,6 +54,14 @@ function renderMarkdown(text) {
|
|
|
54
54
|
|
|
55
55
|
// Display an error message in the banner
|
|
56
56
|
function showErrorBanner(msg) {
|
|
57
|
+
// If DOM isn't ready, wait for it
|
|
58
|
+
if (document.readyState === 'loading') {
|
|
59
|
+
document.addEventListener('DOMContentLoaded', () => {
|
|
60
|
+
showErrorBanner(msg);
|
|
61
|
+
});
|
|
62
|
+
return;
|
|
63
|
+
}
|
|
64
|
+
|
|
57
65
|
const banner = document.getElementById('error-banner');
|
|
58
66
|
if (!banner) return;
|
|
59
67
|
const msgEl = document.getElementById('error-banner-msg');
|
|
@@ -303,12 +311,12 @@ async function loadModelStandardized(modelId, options = {}) {
|
|
|
303
311
|
onLoadingEnd(modelId, false);
|
|
304
312
|
}
|
|
305
313
|
|
|
306
|
-
// Call error callback
|
|
314
|
+
// Call error callback and always show default error banner as fallback
|
|
307
315
|
if (onError) {
|
|
308
316
|
onError(error, modelId);
|
|
309
|
-
} else {
|
|
310
|
-
showErrorBanner('Failed to load model: ' + error.message);
|
|
311
317
|
}
|
|
318
|
+
// Always show error banner to ensure user sees the error
|
|
319
|
+
showErrorBanner('Failed to load model: ' + error.message);
|
|
312
320
|
|
|
313
321
|
return false;
|
|
314
322
|
}
|
|
@@ -157,9 +157,8 @@ body::before {
|
|
|
157
157
|
margin-bottom: 2em;
|
|
158
158
|
border-radius: 8px;
|
|
159
159
|
border: 1px solid #e0e0e0;
|
|
160
|
-
max-width: 1000px;
|
|
161
160
|
min-width: 320px;
|
|
162
|
-
width:
|
|
161
|
+
width: 100%;
|
|
163
162
|
margin-left: 1rem;
|
|
164
163
|
margin-right: 1rem;
|
|
165
164
|
}
|
|
@@ -327,15 +326,16 @@ body::before {
|
|
|
327
326
|
.chat-container {
|
|
328
327
|
display: flex;
|
|
329
328
|
flex-direction: column;
|
|
330
|
-
height: calc(100vh - 650px); /* Subtract space for navbar, title, wall of logos, etc */
|
|
331
329
|
min-height: 300px;
|
|
332
|
-
|
|
333
|
-
max-width:
|
|
330
|
+
min-width: 300px;
|
|
331
|
+
max-width: 100%;
|
|
334
332
|
width: 100%;
|
|
335
333
|
margin: 0 auto;
|
|
336
334
|
border: 1px solid #e0e0e0;
|
|
337
335
|
border-radius: 8px;
|
|
338
336
|
background: #fff;
|
|
337
|
+
resize: both;
|
|
338
|
+
overflow: auto;
|
|
339
339
|
}
|
|
340
340
|
|
|
341
341
|
.chat-history {
|
|
@@ -388,6 +388,21 @@ body::before {
|
|
|
388
388
|
align-self: flex-start;
|
|
389
389
|
}
|
|
390
390
|
|
|
391
|
+
.chat-message.system {
|
|
392
|
+
align-items: flex-start;
|
|
393
|
+
}
|
|
394
|
+
|
|
395
|
+
.chat-bubble.system {
|
|
396
|
+
background: linear-gradient(135deg, #f0f8f0 0%, #e8f5e8 100%);
|
|
397
|
+
color: #2d7f47;
|
|
398
|
+
border-bottom-left-radius: 4px;
|
|
399
|
+
align-self: flex-start;
|
|
400
|
+
border: 1px solid #c8e6c9;
|
|
401
|
+
font-style: normal;
|
|
402
|
+
font-weight: 500;
|
|
403
|
+
box-shadow: 0 1px 3px rgba(45, 127, 71, 0.1);
|
|
404
|
+
}
|
|
405
|
+
|
|
391
406
|
/* Markdown styling within chat bubbles */
|
|
392
407
|
.chat-bubble h1,
|
|
393
408
|
.chat-bubble h2,
|
|
@@ -570,7 +585,7 @@ body::before {
|
|
|
570
585
|
align-items: center;
|
|
571
586
|
}
|
|
572
587
|
|
|
573
|
-
|
|
588
|
+
#chat-input {
|
|
574
589
|
flex: 1;
|
|
575
590
|
padding: 0.5em;
|
|
576
591
|
border: 1px solid #ddd;
|
|
@@ -578,6 +593,16 @@ body::before {
|
|
|
578
593
|
background: #fff;
|
|
579
594
|
color: #222;
|
|
580
595
|
margin: 0;
|
|
596
|
+
resize: vertical;
|
|
597
|
+
min-height: 40px;
|
|
598
|
+
font-family: inherit;
|
|
599
|
+
}
|
|
600
|
+
|
|
601
|
+
/* Update placeholder style */
|
|
602
|
+
#chat-input::placeholder {
|
|
603
|
+
color: #aaa;
|
|
604
|
+
opacity: 1;
|
|
605
|
+
font-style: italic;
|
|
581
606
|
}
|
|
582
607
|
|
|
583
608
|
#attachment-indicator {
|
|
@@ -1897,7 +1922,6 @@ body::before {
|
|
|
1897
1922
|
|
|
1898
1923
|
.category-content {
|
|
1899
1924
|
display: none;
|
|
1900
|
-
padding: 0;
|
|
1901
1925
|
}
|
|
1902
1926
|
|
|
1903
1927
|
.category-content.expanded {
|
|
@@ -52,7 +52,7 @@
|
|
|
52
52
|
<option value="">Pick a model</option>
|
|
53
53
|
</select>
|
|
54
54
|
<div class="input-with-indicator">
|
|
55
|
-
<
|
|
55
|
+
<textarea id="chat-input" placeholder="Type your message..." rows="1"></textarea>
|
|
56
56
|
</div>
|
|
57
57
|
<input type="file" id="file-attachment" style="display: none;" multiple accept="image/*">
|
|
58
58
|
<button id="attachment-btn" title="Attach files">📎</button>
|
|
@@ -92,9 +92,9 @@
|
|
|
92
92
|
<img src="https://raw.githubusercontent.com/lemonade-sdk/assets/refs/heads/main/partner_logos/lm_eval.png" alt="LM-Eval" class="app-logo-img">
|
|
93
93
|
<span class="app-name">LM-Eval</span>
|
|
94
94
|
</a>
|
|
95
|
-
<a href="https://lemonade-
|
|
96
|
-
<img src="https://raw.githubusercontent.com/lemonade-sdk/
|
|
97
|
-
<span class="app-name">
|
|
95
|
+
<a href="https://github.com/lemonade-sdk/lemonade-arcade" target="_blank" class="app-logo-item" title="Lemonade Arcade">
|
|
96
|
+
<img src="https://raw.githubusercontent.com/lemonade-sdk/lemonade-arcade/refs/heads/main/docs/assets/favicon.ico" alt="Lemonade Arcade" class="app-logo-img">
|
|
97
|
+
<span class="app-name">Lemonade Arcade</span>
|
|
98
98
|
</a>
|
|
99
99
|
<a href="https://github.com/lemonade-sdk/lemonade/blob/main/docs/server/apps/ai-toolkit.md" target="_blank" class="app-logo-item" title="AI Toolkit">
|
|
100
100
|
<img src="https://raw.githubusercontent.com/lemonade-sdk/assets/refs/heads/main/partner_logos/ai_toolkit.png" alt="AI Toolkit" class="app-logo-img">
|
|
@@ -81,6 +81,7 @@ class WrappedServer(ABC):
|
|
|
81
81
|
self.process: subprocess.Popen = None
|
|
82
82
|
self.server_name: str = server_name
|
|
83
83
|
self.telemetry: WrappedServerTelemetry = telemetry
|
|
84
|
+
self.log_thread_exception = None
|
|
84
85
|
|
|
85
86
|
def choose_port(self):
|
|
86
87
|
"""
|
|
@@ -192,6 +193,8 @@ class WrappedServer(ABC):
|
|
|
192
193
|
|
|
193
194
|
if self.process.poll() is not None:
|
|
194
195
|
break
|
|
196
|
+
except HTTPException as e:
|
|
197
|
+
self.log_thread_exception = e
|
|
195
198
|
except UnicodeDecodeError as e:
|
|
196
199
|
logging.debug(
|
|
197
200
|
"Unicode decode error reading subprocess output: %s", str(e)
|
|
@@ -217,6 +220,11 @@ class WrappedServer(ABC):
|
|
|
217
220
|
)
|
|
218
221
|
time.sleep(1)
|
|
219
222
|
|
|
223
|
+
if self.log_thread_exception:
|
|
224
|
+
e = self.log_thread_exception
|
|
225
|
+
self.log_thread_exception = None
|
|
226
|
+
raise e
|
|
227
|
+
|
|
220
228
|
@abstractmethod
|
|
221
229
|
def _launch_server_subprocess(
|
|
222
230
|
self,
|
lemonade/version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "8.1.
|
|
1
|
+
__version__ = "8.1.8"
|
lemonade_install/install.py
CHANGED
|
@@ -48,10 +48,6 @@ NPU_DRIVER_DOWNLOAD_URL = (
|
|
|
48
48
|
REQUIRED_NPU_DRIVER_VERSION = "32.0.203.280"
|
|
49
49
|
|
|
50
50
|
lemonade_install_dir = Path(__file__).parent.parent.parent
|
|
51
|
-
DEFAULT_QUARK_VERSION = "quark-0.6.0"
|
|
52
|
-
DEFAULT_QUARK_DIR = os.path.join(
|
|
53
|
-
lemonade_install_dir, "install", "quark", DEFAULT_QUARK_VERSION
|
|
54
|
-
)
|
|
55
51
|
|
|
56
52
|
# List of supported Ryzen AI processor series (can be extended in the future)
|
|
57
53
|
SUPPORTED_RYZEN_AI_SERIES = ["300"]
|
|
@@ -177,7 +173,7 @@ def _get_ryzenai_version_info(device=None):
|
|
|
177
173
|
f"{e}\n Please install lemonade-sdk with "
|
|
178
174
|
"one of the oga extras, for example:\n"
|
|
179
175
|
"pip install lemonade-sdk[dev,oga-cpu]\n"
|
|
180
|
-
"See https://
|
|
176
|
+
"See https://lemonade-server.ai/install_options.html for details"
|
|
181
177
|
) from e
|
|
182
178
|
|
|
183
179
|
|
|
@@ -445,18 +441,18 @@ class Install:
|
|
|
445
441
|
"variable (e.g., Ryzen AI uses environment variable OGA_TOKEN).",
|
|
446
442
|
)
|
|
447
443
|
|
|
448
|
-
parser.add_argument(
|
|
449
|
-
"--quark",
|
|
450
|
-
help="Install Quark Quantization tool for LLMs",
|
|
451
|
-
choices=["0.6.0"],
|
|
452
|
-
)
|
|
453
|
-
|
|
454
444
|
parser.add_argument(
|
|
455
445
|
"--llamacpp",
|
|
456
446
|
help="Install llama.cpp binaries with specified backend",
|
|
457
447
|
choices=["rocm", "vulkan"],
|
|
458
448
|
)
|
|
459
449
|
|
|
450
|
+
parser.add_argument(
|
|
451
|
+
"--override",
|
|
452
|
+
action="store_true",
|
|
453
|
+
help="Override the deprecation error to use legacy tools.",
|
|
454
|
+
)
|
|
455
|
+
|
|
460
456
|
return parser
|
|
461
457
|
|
|
462
458
|
@staticmethod
|
|
@@ -637,7 +633,7 @@ class Install:
|
|
|
637
633
|
return file
|
|
638
634
|
|
|
639
635
|
@staticmethod
|
|
640
|
-
def _install_ryzenai(ryzenai, build_model, yes, token):
|
|
636
|
+
def _install_ryzenai(ryzenai, build_model, yes, token, override=False):
|
|
641
637
|
# Check if the processor is supported before proceeding
|
|
642
638
|
check_ryzen_ai_processor()
|
|
643
639
|
|
|
@@ -654,6 +650,9 @@ class Install:
|
|
|
654
650
|
+ "=" * 80
|
|
655
651
|
+ "\n"
|
|
656
652
|
)
|
|
653
|
+
if not override:
|
|
654
|
+
raise ValueError(warning_msg)
|
|
655
|
+
|
|
657
656
|
print(warning_msg)
|
|
658
657
|
|
|
659
658
|
# Delete any previous Ryzen AI installation in this environment
|
|
@@ -715,36 +714,6 @@ class Install:
|
|
|
715
714
|
except IOError as e:
|
|
716
715
|
print(f"An error occurred while writing {version_info_path}: {e}")
|
|
717
716
|
|
|
718
|
-
@staticmethod
|
|
719
|
-
def _install_quark(quark):
|
|
720
|
-
quark_install_dir = os.path.join(lemonade_install_dir, "install", "quark")
|
|
721
|
-
os.makedirs(quark_install_dir, exist_ok=True)
|
|
722
|
-
|
|
723
|
-
# Install Quark utilities
|
|
724
|
-
quark_url = (
|
|
725
|
-
f"https://www.xilinx.com/bin/public/openDownload?filename=quark-{quark}.zip"
|
|
726
|
-
)
|
|
727
|
-
quark_path = download_and_extract_package(
|
|
728
|
-
url=quark_url,
|
|
729
|
-
version=quark,
|
|
730
|
-
install_dir=quark_install_dir,
|
|
731
|
-
package_name="quark",
|
|
732
|
-
)
|
|
733
|
-
# Install Quark wheel
|
|
734
|
-
wheel_url = (
|
|
735
|
-
"https://www.xilinx.com/bin/public/openDownload?"
|
|
736
|
-
f"filename=quark-{quark}-py3-none-any.whl"
|
|
737
|
-
)
|
|
738
|
-
wheel_path = os.path.join(quark_install_dir, f"quark-{quark}-py3-none-any.whl")
|
|
739
|
-
print(f"\nInstalling Quark wheel from {wheel_url}")
|
|
740
|
-
download_file(wheel_url, wheel_path, "wheel file")
|
|
741
|
-
|
|
742
|
-
install_cmd = f"{sys.executable} -m pip install --no-deps {wheel_path}"
|
|
743
|
-
subprocess.run(install_cmd, check=True, shell=True)
|
|
744
|
-
os.remove(wheel_path)
|
|
745
|
-
|
|
746
|
-
print(f"\nQuark installed successfully at: {quark_path}")
|
|
747
|
-
|
|
748
717
|
@staticmethod
|
|
749
718
|
def _install_llamacpp(backend):
|
|
750
719
|
"""
|
|
@@ -762,22 +731,19 @@ class Install:
|
|
|
762
731
|
self,
|
|
763
732
|
ryzenai: Optional[str] = None,
|
|
764
733
|
build_model: Optional[str] = None,
|
|
765
|
-
quark: Optional[str] = None,
|
|
766
734
|
llamacpp: Optional[str] = None,
|
|
767
735
|
yes: bool = False,
|
|
768
736
|
token: Optional[str] = None,
|
|
737
|
+
override: bool = False,
|
|
769
738
|
):
|
|
770
|
-
if ryzenai is None and
|
|
739
|
+
if ryzenai is None and llamacpp is None:
|
|
771
740
|
raise ValueError(
|
|
772
741
|
"You must select something to install, "
|
|
773
|
-
"for example `--ryzenai
|
|
742
|
+
"for example `--ryzenai` or `--llamacpp`"
|
|
774
743
|
)
|
|
775
744
|
|
|
776
745
|
if ryzenai is not None:
|
|
777
|
-
self._install_ryzenai(ryzenai, build_model, yes, token)
|
|
778
|
-
|
|
779
|
-
if quark is not None:
|
|
780
|
-
self._install_quark(quark)
|
|
746
|
+
self._install_ryzenai(ryzenai, build_model, yes, token, override)
|
|
781
747
|
|
|
782
748
|
if llamacpp is not None:
|
|
783
749
|
self._install_llamacpp(llamacpp)
|