lemonade-sdk 8.1.7__py3-none-any.whl → 8.1.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lemonade-sdk might be problematic. Click here for more details.

Files changed (29) hide show
  1. lemonade/cli.py +47 -5
  2. lemonade/profilers/agt_power.py +437 -0
  3. lemonade/profilers/hwinfo_power.py +429 -0
  4. lemonade/tools/llamacpp/utils.py +15 -4
  5. lemonade/tools/oga/load.py +15 -2
  6. lemonade/tools/report/table.py +1 -1
  7. lemonade/tools/server/llamacpp.py +19 -13
  8. lemonade/tools/server/serve.py +39 -9
  9. lemonade/tools/server/static/js/chat.js +545 -242
  10. lemonade/tools/server/static/js/models.js +112 -24
  11. lemonade/tools/server/static/js/shared.js +15 -5
  12. lemonade/tools/server/static/styles.css +145 -75
  13. lemonade/tools/server/static/webapp.html +23 -27
  14. lemonade/tools/server/wrapped_server.py +8 -0
  15. lemonade/version.py +1 -1
  16. lemonade_install/install.py +15 -49
  17. {lemonade_sdk-8.1.7.dist-info → lemonade_sdk-8.1.9.dist-info}/METADATA +16 -64
  18. {lemonade_sdk-8.1.7.dist-info → lemonade_sdk-8.1.9.dist-info}/RECORD +26 -27
  19. lemonade_server/cli.py +12 -9
  20. lemonade_server/model_manager.py +48 -0
  21. lemonade_server/server_models.json +24 -6
  22. lemonade/tools/quark/__init__.py +0 -0
  23. lemonade/tools/quark/quark_load.py +0 -173
  24. lemonade/tools/quark/quark_quantize.py +0 -439
  25. {lemonade_sdk-8.1.7.dist-info → lemonade_sdk-8.1.9.dist-info}/WHEEL +0 -0
  26. {lemonade_sdk-8.1.7.dist-info → lemonade_sdk-8.1.9.dist-info}/entry_points.txt +0 -0
  27. {lemonade_sdk-8.1.7.dist-info → lemonade_sdk-8.1.9.dist-info}/licenses/LICENSE +0 -0
  28. {lemonade_sdk-8.1.7.dist-info → lemonade_sdk-8.1.9.dist-info}/licenses/NOTICE.md +0 -0
  29. {lemonade_sdk-8.1.7.dist-info → lemonade_sdk-8.1.9.dist-info}/top_level.txt +0 -0
@@ -1,10 +1,18 @@
1
1
  // Chat logic and functionality
2
2
  let messages = [];
3
3
  let attachedFiles = [];
4
+ let systemMessageElement = null;
4
5
 
5
6
  // Default model configuration
6
7
  const DEFAULT_MODEL = 'Qwen2.5-0.5B-Instruct-CPU';
7
8
 
9
+ const THINKING_ANIM_INTERVAL_MS = 550;
10
+ // Toggle this to false if you prefer plain dots only.
11
+ const THINKING_USE_LEMON = true;
12
+ const THINKING_FRAMES = THINKING_USE_LEMON
13
+ ? ['Thinking.','Thinking..','Thinking...','Thinking 🍋']
14
+ : ['Thinking.','Thinking..','Thinking...'];
15
+
8
16
  // Get DOM elements
9
17
  let chatHistory, chatInput, sendBtn, attachmentBtn, fileAttachment, attachmentsPreviewContainer, attachmentsPreviewRow, modelSelect;
10
18
 
@@ -21,19 +29,22 @@ document.addEventListener('DOMContentLoaded', function() {
21
29
 
22
30
  // Set up event listeners
23
31
  setupChatEventListeners();
24
-
32
+
25
33
  // Initialize model dropdown (will be populated when models.js calls updateModelStatusIndicator)
26
34
  initializeModelDropdown();
27
-
35
+
28
36
  // Update attachment button state periodically
29
37
  updateAttachmentButtonState();
30
38
  setInterval(updateAttachmentButtonState, 1000);
39
+
40
+ // Display initial system message
41
+ displaySystemMessage();
31
42
  });
32
43
 
33
44
  function setupChatEventListeners() {
34
45
  // Send button click
35
46
  sendBtn.onclick = sendMessage;
36
-
47
+
37
48
  // Attachment button click
38
49
  attachmentBtn.onclick = () => {
39
50
  if (!currentLoadedModel) {
@@ -53,10 +64,10 @@ function setupChatEventListeners() {
53
64
  // Chat input events
54
65
  chatInput.addEventListener('keydown', handleChatInputKeydown);
55
66
  chatInput.addEventListener('paste', handleChatInputPaste);
56
-
67
+
57
68
  // Model select change
58
69
  modelSelect.addEventListener('change', handleModelSelectChange);
59
-
70
+
60
71
  // Send button click
61
72
  sendBtn.addEventListener('click', function() {
62
73
  // Check if we have a loaded model
@@ -72,10 +83,14 @@ function setupChatEventListeners() {
72
83
  // Initialize model dropdown with available models
73
84
  function initializeModelDropdown() {
74
85
  const allModels = window.SERVER_MODELS || {};
75
-
86
+
76
87
  // Clear existing options except the first one
77
- modelSelect.innerHTML = '<option value="">Pick a model</option>';
78
-
88
+ const indicator = document.getElementById('model-status-indicator');
89
+ if (indicator.classList.contains('offline') || modelSelect.value === 'server-offline') {
90
+ modelSelect.value = 'server-offline';
91
+ } else {
92
+ modelSelect.innerHTML = '<option value="">Click to select a model ▼</option>';
93
+ }
79
94
  // Add only installed models to dropdown
80
95
  Object.keys(allModels).forEach(modelId => {
81
96
  // Only add if the model is installed
@@ -86,7 +101,7 @@ function initializeModelDropdown() {
86
101
  modelSelect.appendChild(option);
87
102
  }
88
103
  });
89
-
104
+
90
105
  // Set current selection based on loaded model
91
106
  updateModelSelectValue();
92
107
  }
@@ -96,10 +111,15 @@ window.initializeModelDropdown = initializeModelDropdown;
96
111
 
97
112
  // Update model select value to match currently loaded model
98
113
  function updateModelSelectValue() {
99
- if (currentLoadedModel) {
114
+ const indicator = document.getElementById('model-status-indicator');
115
+ if (currentLoadedModel && indicator.classList.contains('loading')) {
116
+ modelSelect.value = 'loading-model';
117
+ } else if (currentLoadedModel) {
100
118
  modelSelect.value = currentLoadedModel;
119
+ } else if (indicator.classList.contains('offline') && modelSelect.value === 'server-offline') {
120
+ modelSelect.value = 'server-offline';
101
121
  } else {
102
- modelSelect.value = '';
122
+ return;
103
123
  }
104
124
  }
105
125
 
@@ -109,37 +129,34 @@ window.updateModelSelectValue = updateModelSelectValue;
109
129
  // Handle model selection change
110
130
  async function handleModelSelectChange() {
111
131
  const selectedModel = modelSelect.value;
112
-
113
- if (!selectedModel) {
114
- return; // "Pick a model" selected
115
- }
116
-
117
- if (selectedModel === currentLoadedModel) {
118
- return; // Same model already loaded
119
- }
120
-
132
+
133
+ if (!selectedModel) return; // "Click to select a model ▼" selected
134
+ if (selectedModel === currentLoadedModel) return; // Same model already loaded
135
+
121
136
  // Use the standardized load function
122
137
  await loadModelStandardized(selectedModel, {
123
138
  onLoadingStart: (modelId) => {
124
139
  // Update dropdown to show loading state with model name
125
- const loadingOption = modelSelect.querySelector('option[value=""]');
140
+ const loadingOption = document.createElement('option');
141
+ const select = document.getElementById('model-select');
142
+ select.innerHTML = '';
143
+
126
144
  if (loadingOption) {
145
+ loadingOption.value = 'loading-model';
127
146
  loadingOption.textContent = `Loading ${modelId}...`;
147
+ loadingOption.hidden = true;
148
+ select.appendChild(loadingOption);
128
149
  }
129
150
  },
130
151
  onLoadingEnd: (modelId, success) => {
131
152
  // Reset the default option text
132
153
  const defaultOption = modelSelect.querySelector('option[value=""]');
133
- if (defaultOption) {
134
- defaultOption.textContent = 'Pick a model';
135
- }
136
- },
137
- onSuccess: (loadedModelId) => {
138
- // Update attachment button state for new model
154
+ if (defaultOption) defaultOption.textContent = 'Click to select a model ▼';
155
+ },
156
+ onSuccess: () => {
139
157
  updateAttachmentButtonState();
140
158
  },
141
- onError: (error, failedModelId) => {
142
- // Reset dropdown to previous value on error
159
+ onError: () => {
143
160
  updateModelSelectValue();
144
161
  }
145
162
  });
@@ -149,7 +166,7 @@ async function handleModelSelectChange() {
149
166
  function updateAttachmentButtonState() {
150
167
  // Update model dropdown selection
151
168
  updateModelSelectValue();
152
-
169
+
153
170
  // Update send button state based on model loading
154
171
  if (modelSelect.disabled) {
155
172
  sendBtn.disabled = true;
@@ -163,25 +180,30 @@ function updateAttachmentButtonState() {
163
180
  attachmentBtn.style.opacity = '0.5';
164
181
  attachmentBtn.style.cursor = 'not-allowed';
165
182
  attachmentBtn.title = 'Load a model first';
166
- return;
167
- }
168
-
169
- const isVision = isVisionModel(currentLoadedModel);
170
-
171
- if (isVision) {
172
- attachmentBtn.style.opacity = '1';
173
- attachmentBtn.style.cursor = 'pointer';
174
- attachmentBtn.title = 'Attach images';
175
183
  } else {
176
- attachmentBtn.style.opacity = '0.5';
177
- attachmentBtn.style.cursor = 'not-allowed';
178
- attachmentBtn.title = 'Image attachments not supported by this model';
184
+ const isVision = isVisionModel(currentLoadedModel);
185
+
186
+ if (isVision) {
187
+ attachmentBtn.style.opacity = '1';
188
+ attachmentBtn.style.cursor = 'pointer';
189
+ attachmentBtn.title = 'Attach images';
190
+ } else {
191
+ attachmentBtn.style.opacity = '0.5';
192
+ attachmentBtn.style.cursor = 'not-allowed';
193
+ attachmentBtn.title = 'Image attachments not supported by this model';
194
+ }
179
195
  }
196
+
197
+ // Update system message when model state changes
198
+ displaySystemMessage();
180
199
  }
181
200
 
182
201
  // Make updateAttachmentButtonState accessible globally
183
202
  window.updateAttachmentButtonState = updateAttachmentButtonState;
184
203
 
204
+ // Make displaySystemMessage accessible globally
205
+ window.displaySystemMessage = displaySystemMessage;
206
+
185
207
  // Auto-load default model and send message
186
208
  async function autoLoadDefaultModelAndSend() {
187
209
  // Check if default model is available and installed
@@ -189,34 +211,27 @@ async function autoLoadDefaultModelAndSend() {
189
211
  showErrorBanner('No models available. Please install a model first.');
190
212
  return;
191
213
  }
192
-
214
+
193
215
  if (!window.installedModels || !window.installedModels.has(DEFAULT_MODEL)) {
194
216
  showErrorBanner('Default model is not installed. Please install it from the Model Management tab.');
195
217
  return;
196
218
  }
197
-
219
+
198
220
  // Store the message to send after loading
199
221
  const messageToSend = chatInput.value.trim();
200
- if (!messageToSend && attachedFiles.length === 0) {
201
- return; // Nothing to send
202
- }
203
-
222
+ if (!messageToSend && attachedFiles.length === 0) return;
223
+
204
224
  // Use the standardized load function
205
225
  const success = await loadModelStandardized(DEFAULT_MODEL, {
206
- onLoadingStart: (modelId) => {
207
- // Custom UI updates for auto-loading
208
- sendBtn.textContent = 'Loading model...';
209
- },
210
- onLoadingEnd: (modelId, loadSuccess) => {
211
- // Reset send button text
212
- sendBtn.textContent = 'Send';
213
- },
214
- onSuccess: (loadedModelId) => {
215
- // Send the message after successful load
216
- sendMessage(messageToSend);
217
- },
218
- onError: (error, failedModelId) => {
226
+ // Custom UI updates for auto-loading
227
+ onLoadingStart: () => { sendBtn.textContent = 'Loading model...'; },
228
+ // Reset send button text
229
+ onLoadingEnd: () => { sendBtn.textContent = 'Send'; },
230
+ // Send the message after successful load
231
+ onSuccess: () => { sendMessage(messageToSend); },
232
+ onError: (error) => {
219
233
  console.error('Error auto-loading default model:', error);
234
+ showErrorBanner('Failed to load model: ' + error.message);
220
235
  }
221
236
  });
222
237
  }
@@ -237,15 +252,15 @@ function handleFileSelection() {
237
252
  // Check if current model supports vision
238
253
  if (!currentLoadedModel) {
239
254
  alert('Please load a model first before attaching images.');
240
- fileAttachment.value = ''; // Clear the input
255
+ fileAttachment.value = '';
241
256
  return;
242
257
  }
243
258
  if (!isVisionModel(currentLoadedModel)) {
244
259
  alert(`The current model "${currentLoadedModel}" does not support image inputs. Please load a model with "Vision" capabilities.`);
245
- fileAttachment.value = ''; // Clear the input
260
+ fileAttachment.value = '';
246
261
  return;
247
262
  }
248
-
263
+
249
264
  // Filter only image files
250
265
  const imageFiles = Array.from(fileAttachment.files).filter(file => {
251
266
  if (!file.type.startsWith('image/')) {
@@ -254,17 +269,17 @@ function handleFileSelection() {
254
269
  }
255
270
  return true;
256
271
  });
257
-
272
+
258
273
  if (imageFiles.length === 0) {
259
274
  alert('Please select only image files (PNG, JPG, GIF, etc.)');
260
- fileAttachment.value = ''; // Clear the input
275
+ fileAttachment.value = '';
261
276
  return;
262
277
  }
263
-
278
+
264
279
  if (imageFiles.length !== fileAttachment.files.length) {
265
280
  alert(`${fileAttachment.files.length - imageFiles.length} non-image file(s) were skipped. Only image files are supported.`);
266
281
  }
267
-
282
+
268
283
  attachedFiles = imageFiles;
269
284
  updateInputPlaceholder();
270
285
  updateAttachmentPreviewVisibility();
@@ -277,7 +292,8 @@ function handleChatInputKeydown(e) {
277
292
  if (e.key === 'Escape' && attachedFiles.length > 0) {
278
293
  e.preventDefault();
279
294
  clearAttachments();
280
- } else if (e.key === 'Enter') {
295
+ } else if (e.key === 'Enter' && !e.shiftKey) {
296
+ e.preventDefault();
281
297
  // Check if we have a loaded model
282
298
  if (currentLoadedModel && modelSelect.value !== '' && !modelSelect.disabled) {
283
299
  sendMessage();
@@ -291,19 +307,19 @@ function handleChatInputKeydown(e) {
291
307
  // Handle paste events for images
292
308
  async function handleChatInputPaste(e) {
293
309
  e.preventDefault();
294
-
310
+
295
311
  const clipboardData = e.clipboardData || window.clipboardData;
296
312
  const items = clipboardData.items;
297
313
  let hasImage = false;
298
314
  let pastedText = '';
299
-
315
+
300
316
  // Check for text content first
301
317
  for (let item of items) {
302
318
  if (item.type === 'text/plain') {
303
319
  pastedText = clipboardData.getData('text/plain');
304
320
  }
305
321
  }
306
-
322
+
307
323
  // Check for images
308
324
  for (let item of items) {
309
325
  if (item.type.indexOf('image') !== -1) {
@@ -314,10 +330,7 @@ async function handleChatInputPaste(e) {
314
330
  const currentModel = modelSelect.value;
315
331
  if (!isVisionModel(currentModel)) {
316
332
  alert(`The selected model "${currentModel}" does not support image inputs. Please select a model with "Vision" capabilities to paste images.`);
317
- // Only paste text, skip the image
318
- if (pastedText) {
319
- chatInput.value = pastedText;
320
- }
333
+ if (pastedText) chatInput.value = pastedText;
321
334
  return;
322
335
  }
323
336
  // Add to attachedFiles array only if it's an image and model supports vision
@@ -327,12 +340,10 @@ async function handleChatInputPaste(e) {
327
340
  }
328
341
  }
329
342
  }
330
-
343
+
331
344
  // Update input box content - only show text, images will be indicated separately
332
- if (pastedText) {
333
- chatInput.value = pastedText;
334
- }
335
-
345
+ if (pastedText) chatInput.value = pastedText;
346
+
336
347
  // Update placeholder to show attached images
337
348
  updateInputPlaceholder();
338
349
  updateAttachmentPreviewVisibility();
@@ -358,46 +369,42 @@ function updateAttachmentPreviewVisibility() {
358
369
  function updateAttachmentPreviews() {
359
370
  // Clear existing previews
360
371
  attachmentsPreviewRow.innerHTML = '';
361
-
362
- if (attachedFiles.length === 0) {
363
- return;
364
- }
365
-
372
+
373
+ if (attachedFiles.length === 0) return;
374
+
366
375
  attachedFiles.forEach((file, index) => {
367
376
  // Skip non-image files (extra safety check)
368
377
  if (!file.type.startsWith('image/')) {
369
378
  console.warn(`Skipping non-image file in preview: ${file.name} (${file.type})`);
370
379
  return;
371
380
  }
372
-
381
+
373
382
  const previewDiv = document.createElement('div');
374
383
  previewDiv.className = 'attachment-preview';
375
-
384
+
376
385
  // Create thumbnail
377
386
  const thumbnail = document.createElement('img');
378
387
  thumbnail.className = 'attachment-thumbnail';
379
388
  thumbnail.alt = file.name;
380
-
389
+
381
390
  // Create filename display
382
391
  const filename = document.createElement('div');
383
392
  filename.className = 'attachment-filename';
384
393
  filename.textContent = file.name || `pasted-image-${index + 1}`;
385
394
  filename.title = file.name || `pasted-image-${index + 1}`;
386
-
395
+
387
396
  // Create remove button
388
397
  const removeBtn = document.createElement('button');
389
398
  removeBtn.className = 'attachment-remove-btn';
390
399
  removeBtn.innerHTML = '✕';
391
400
  removeBtn.title = 'Remove this image';
392
401
  removeBtn.onclick = () => removeAttachment(index);
393
-
402
+
394
403
  // Generate thumbnail for image
395
404
  const reader = new FileReader();
396
- reader.onload = (e) => {
397
- thumbnail.src = e.target.result;
398
- };
405
+ reader.onload = (e) => { thumbnail.src = e.target.result; };
399
406
  reader.readAsDataURL(file);
400
-
407
+
401
408
  previewDiv.appendChild(thumbnail);
402
409
  previewDiv.appendChild(filename);
403
410
  previewDiv.appendChild(removeBtn);
@@ -426,149 +433,385 @@ function fileToBase64(file) {
426
433
  return new Promise((resolve, reject) => {
427
434
  const reader = new FileReader();
428
435
  reader.readAsDataURL(file);
429
- reader.onload = () => resolve(reader.result.split(',')[1]); // Remove data:image/...;base64, prefix
436
+ reader.onload = () => resolve(reader.result.split(',')[1]);
430
437
  reader.onerror = error => reject(error);
431
438
  });
432
439
  }
433
440
 
441
+ /**
442
+ * Incrementally (re)renders reasoning + answer without blowing away the header so user
443
+ * collapsing/expanding persists while tokens stream.
444
+ */
445
+ function updateMessageContent(bubbleElement, text, isMarkdown = false) {
446
+ if (!isMarkdown) {
447
+ bubbleElement.textContent = text;
448
+ return;
449
+ }
450
+
451
+ const { main, thought, isThinking } = parseReasoningBlocks(text);
452
+
453
+ // Pure normal markdown (no reasoning)
454
+ if (!thought.trim()) {
455
+ // If structure existed before, replace fully (safe—no toggle needed)
456
+ bubbleElement.innerHTML = renderMarkdown(main);
457
+ delete bubbleElement.dataset.thinkExpanded;
458
+ return;
459
+ }
460
+
461
+ // Determine current expanded state (user preference) or default
462
+ let expanded;
463
+ if (bubbleElement.dataset.thinkExpanded === 'true') expanded = true;
464
+ else if (bubbleElement.dataset.thinkExpanded === 'false') expanded = false;
465
+ else expanded = !!isThinking; // default: open while still streaming until user intervenes
466
+
467
+ // Create structure once
468
+ let container = bubbleElement.querySelector('.think-tokens-container');
469
+ let thoughtContent, headerChevron, headerLabel, mainDiv;
470
+
471
+ if (!container) {
472
+ bubbleElement.innerHTML = ''; // first time constructing reasoning UI
473
+
474
+ container = document.createElement('div');
475
+ container.className = 'think-tokens-container' + (expanded ? '' : ' collapsed');
476
+
477
+ const header = document.createElement('div');
478
+ header.className = 'think-tokens-header';
479
+ header.onclick = function () { toggleThinkTokens(header); };
480
+
481
+ headerChevron = document.createElement('span');
482
+ headerChevron.className = 'think-tokens-chevron';
483
+ headerChevron.textContent = expanded ? '▼' : '▶';
484
+
485
+ headerLabel = document.createElement('span');
486
+ headerLabel.className = 'think-tokens-label';
487
+ header.appendChild(headerChevron);
488
+ header.appendChild(headerLabel);
489
+
490
+ thoughtContent = document.createElement('div');
491
+ thoughtContent.className = 'think-tokens-content';
492
+ thoughtContent.style.display = expanded ? 'block' : 'none';
493
+
494
+ container.appendChild(header);
495
+ container.appendChild(thoughtContent);
496
+ bubbleElement.appendChild(container);
497
+
498
+ if (main.trim()) {
499
+ mainDiv = document.createElement('div');
500
+ mainDiv.className = 'main-response';
501
+ bubbleElement.appendChild(mainDiv);
502
+ }
503
+ } else {
504
+ thoughtContent = container.querySelector('.think-tokens-content');
505
+ headerChevron = container.querySelector('.think-tokens-chevron');
506
+ headerLabel = container.querySelector('.think-tokens-label');
507
+ mainDiv = bubbleElement.querySelector('.main-response');
508
+ }
509
+
510
+ // Update label & chevron (don’t override user-expanded state)
511
+ headerChevron.textContent = expanded ? '▼' : '▶';
512
+ // Animation-aware label handling
513
+ if (isThinking) {
514
+ // If not already animating, seed an initial frame then start animation
515
+ if (bubbleElement.dataset.thinkAnimActive !== '1') {
516
+ headerLabel.textContent = THINKING_FRAMES[0];
517
+ startThinkingAnimation(container);
518
+ }
519
+ } else {
520
+ // Stop any animation and set final label
521
+ if (bubbleElement.dataset.thinkAnimActive === '1') {
522
+ stopThinkingAnimation(container);
523
+ } else {
524
+ headerLabel.textContent = 'Thought Process';
525
+ }
526
+ }
527
+
528
+ // Update reasoning content (can re-run markdown safely)
529
+ thoughtContent.innerHTML = renderMarkdown(thought);
530
+
531
+ // Update main answer section
532
+ if (main.trim()) {
533
+ if (!mainDiv) {
534
+ mainDiv = document.createElement('div');
535
+ mainDiv.className = 'main-response';
536
+ bubbleElement.appendChild(mainDiv);
537
+ }
538
+ mainDiv.innerHTML = renderMarkdown(main);
539
+ } else if (mainDiv) {
540
+ mainDiv.remove();
541
+ }
542
+
543
+ // Persist preference
544
+ bubbleElement.dataset.thinkExpanded = expanded ? 'true' : 'false';
545
+ }
546
+
434
547
  function appendMessage(role, text, isMarkdown = false) {
435
548
  const div = document.createElement('div');
436
549
  div.className = 'chat-message ' + role;
437
550
  // Add a bubble for iMessage style
438
551
  const bubble = document.createElement('div');
439
552
  bubble.className = 'chat-bubble ' + role;
440
-
441
- if (role === 'llm' && isMarkdown) {
442
- bubble.innerHTML = renderMarkdownWithThinkTokens(text);
553
+
554
+ // Check if isMarkdown is true, regardless of role
555
+ if (isMarkdown) {
556
+ // Build structure via incremental updater (ensures later token updates won’t wipe user toggle)
557
+ updateMessageContent(bubble, text, true);
443
558
  } else {
444
559
  bubble.textContent = text;
445
560
  }
446
-
561
+
447
562
  div.appendChild(bubble);
448
563
  chatHistory.appendChild(div);
449
564
  chatHistory.scrollTop = chatHistory.scrollHeight;
450
- return bubble; // Return the bubble element for streaming updates
565
+ return bubble;
451
566
  }
452
567
 
453
- function updateMessageContent(bubbleElement, text, isMarkdown = false) {
454
- if (isMarkdown) {
455
- bubbleElement.innerHTML = renderMarkdownWithThinkTokens(text);
568
+ // Display system message based on current state
569
+ function displaySystemMessage() {
570
+ // Remove existing system message if it exists
571
+ if (systemMessageElement) {
572
+ systemMessageElement.remove();
573
+ systemMessageElement = null;
574
+ }
575
+
576
+ // Don't show system message if there are already user/LLM messages
577
+ if (messages.length > 0) return;
578
+
579
+ let messageText = '';
580
+
581
+ // Check if any models are installed
582
+ const hasInstalledModels = window.installedModels && window.installedModels.size > 0;
583
+
584
+ if (!hasInstalledModels) {
585
+ // No models installed - show first message
586
+ messageText = `Welcome to Lemonade! To get started:
587
+ 1. Head over to the Model Management tab.
588
+ 2. Use the 📥Download button to download a model.
589
+ 3. Use the 🚀Load button to load the model.
590
+ 4. Come back to this tab, and you are ready to chat with the model.`;
591
+ } else if (!currentLoadedModel) {
592
+ // Models available but none loaded - show second message
593
+ messageText = 'Welcome to Lemonade! Choose a model from the dropdown menu below to load it and start chatting.';
594
+ }
595
+
596
+ if (messageText) {
597
+ const div = document.createElement('div');
598
+ div.className = 'chat-message system';
599
+ div.setAttribute('data-system-message', 'true');
600
+
601
+ const bubble = document.createElement('div');
602
+ bubble.className = 'chat-bubble system';
603
+ bubble.textContent = messageText;
604
+
605
+ div.appendChild(bubble);
606
+ chatHistory.appendChild(div);
607
+ chatHistory.scrollTop = chatHistory.scrollHeight;
608
+
609
+ systemMessageElement = div;
610
+ }
611
+ }
612
+
613
+ function toggleThinkTokens(header) {
614
+ const container = header.parentElement;
615
+ const content = container.querySelector('.think-tokens-content');
616
+ const chevron = header.querySelector('.think-tokens-chevron');
617
+ const bubble = header.closest('.chat-bubble');
618
+
619
+ const nowCollapsed = !container.classList.contains('collapsed'); // current (before toggle) expanded?
620
+ if (nowCollapsed) {
621
+ // Collapse
622
+ content.style.display = 'none';
623
+ chevron.textContent = '▶';
624
+ container.classList.add('collapsed');
625
+ if (bubble) bubble.dataset.thinkExpanded = 'false';
456
626
  } else {
457
- bubbleElement.textContent = text;
627
+ // Expand
628
+ content.style.display = 'block';
629
+ chevron.textContent = '▼';
630
+ container.classList.remove('collapsed');
631
+ if (bubble) bubble.dataset.thinkExpanded = 'true';
458
632
  }
459
633
  }
460
634
 
461
- function renderMarkdownWithThinkTokens(text) {
462
- // Check if text contains opening think tag
463
- if (text.includes('<think>')) {
464
- if (text.includes('</think>')) {
465
- // Complete think block - handle as before
466
- const thinkMatch = text.match(/<think>(.*?)<\/think>/s);
467
- if (thinkMatch) {
468
- const thinkContent = thinkMatch[1].trim();
469
- const mainResponse = text.replace(/<think>.*?<\/think>/s, '').trim();
470
-
471
- // Create collapsible structure
472
- let html = '';
473
- if (thinkContent) {
474
- html += `
475
- <div class="think-tokens-container">
476
- <div class="think-tokens-header" onclick="toggleThinkTokens(this)">
477
- <span class="think-tokens-chevron">▼</span>
478
- <span class="think-tokens-label">Thinking...</span>
479
- </div>
480
- <div class="think-tokens-content">
481
- ${renderMarkdown(thinkContent)}
482
- </div>
483
- </div>
484
- `;
485
- }
486
- if (mainResponse) {
487
- html += `<div class="main-response">${renderMarkdown(mainResponse)}</div>`;
488
- }
489
- return html;
635
+ // ---------- Reasoning Parsing (Harmony + <think>) ----------
636
+
637
+ function parseReasoningBlocks(raw) {
638
+ if (raw == null) return { main: '', thought: '', isThinking: false };
639
+ // Added additional Harmony variants: <|channel|>analysis<|channel|>, <|channel|>analysis<|message|>, <|channel|>analysis<|assistant|>
640
+ const RE_OPEN = /<think>|<\|channel\|>analysis<\|(channel|message|assistant)\|>/;
641
+ const RE_CLOSE = /<\/think>|<\|end\|>/;
642
+
643
+ let remaining = String(raw);
644
+ let main = '';
645
+ let thought = '';
646
+ let isThinking = false;
647
+
648
+ while (true) {
649
+ const openIdx = remaining.search(RE_OPEN);
650
+ if (openIdx === -1) {
651
+ if (isThinking) {
652
+ thought += remaining;
653
+ } else {
654
+ main += remaining;
490
655
  }
656
+ break;
657
+ }
658
+
659
+ // Text before the opener
660
+ if (isThinking) {
661
+ thought += remaining.slice(0, openIdx);
491
662
  } else {
492
- // Partial think block - only opening tag found, still being generated
493
- const thinkMatch = text.match(/<think>(.*)/s);
494
- if (thinkMatch) {
495
- const thinkContent = thinkMatch[1];
496
- const beforeThink = text.substring(0, text.indexOf('<think>'));
497
-
498
- let html = '';
499
- if (beforeThink.trim()) {
500
- html += `<div class="main-response">${renderMarkdown(beforeThink)}</div>`;
501
- }
502
-
503
- html += `
504
- <div class="think-tokens-container">
505
- <div class="think-tokens-header" onclick="toggleThinkTokens(this)">
506
- <span class="think-tokens-chevron">▼</span>
507
- <span class="think-tokens-label">Thinking...</span>
508
- </div>
509
- <div class="think-tokens-content">
510
- ${renderMarkdown(thinkContent)}
511
- </div>
512
- </div>
513
- `;
514
-
515
- return html;
516
- }
663
+ main += remaining.slice(0, openIdx);
517
664
  }
665
+
666
+ // Drop the opener
667
+ remaining = remaining.slice(openIdx).replace(RE_OPEN, '');
668
+ isThinking = true;
669
+
670
+ const closeIdx = remaining.search(RE_CLOSE);
671
+ if (closeIdx === -1) {
672
+ // Still streaming reasoning (no closer yet)
673
+ thought += remaining;
674
+ break;
675
+ }
676
+
677
+ // Add reasoning segment up to closer
678
+ thought += remaining.slice(0, closeIdx);
679
+ remaining = remaining.slice(closeIdx).replace(RE_CLOSE, '');
680
+ isThinking = false;
681
+ // Loop to look for additional reasoning blocks
518
682
  }
519
-
520
- // Fallback to normal markdown rendering
521
- return renderMarkdown(text);
683
+ return { main, thought, isThinking };
684
+ }
685
+
686
+ function renderMarkdownWithThinkTokens(text, preservedExpanded) {
687
+ const { main, thought, isThinking } = parseReasoningBlocks(text);
688
+
689
+ if (!thought.trim()) {
690
+ return renderMarkdown(main);
691
+ }
692
+
693
+ // If we have a preserved user preference, honor it. Otherwise default:
694
+ // open while streaming (original behavior) else collapsed = false.
695
+ let expanded = (typeof preservedExpanded === 'boolean')
696
+ ? preservedExpanded
697
+ : !!isThinking;
698
+
699
+ const chevron = expanded ? '▼' : '▶';
700
+ const label = expanded && isThinking ? 'Thinking...' : (expanded ? 'Thought Process' : 'Thought Process');
701
+
702
+ let html = `
703
+ <div class="think-tokens-container${expanded ? '' : ' collapsed'}">
704
+ <div class="think-tokens-header" onclick="toggleThinkTokens(this)">
705
+ <span class="think-tokens-chevron">${chevron}</span>
706
+ <span class="think-tokens-label">${label}</span>
707
+ </div>
708
+ <div class="think-tokens-content" style="display:${expanded ? 'block' : 'none'};">
709
+ ${renderMarkdown(thought)}
710
+ </div>
711
+ </div>
712
+ `;
713
+ if (main.trim()) {
714
+ html += `<div class="main-response">${renderMarkdown(main)}</div>`;
715
+ }
716
+ return html;
717
+ }
718
+
719
+ function extractAssistantReasoning(fullText) {
720
+ const { main, thought } = parseReasoningBlocks(fullText);
721
+ const result = { content: (main || '').trim(), raw: fullText };
722
+ if (thought && thought.trim()) result.reasoning_content = thought.trim();
723
+ return result;
522
724
  }
523
725
 
726
+ // -----------------------------------------------------------
727
+
524
728
  function toggleThinkTokens(header) {
525
729
  const container = header.parentElement;
526
730
  const content = container.querySelector('.think-tokens-content');
527
731
  const chevron = header.querySelector('.think-tokens-chevron');
528
-
529
- if (content.style.display === 'none') {
530
- content.style.display = 'block';
531
- chevron.textContent = '▼';
532
- container.classList.remove('collapsed');
533
- } else {
732
+ const bubble = header.closest('.chat-bubble');
733
+
734
+ const nowCollapsed = !container.classList.contains('collapsed'); // current (before toggle) expanded?
735
+ if (nowCollapsed) {
736
+ // Collapse
534
737
  content.style.display = 'none';
535
738
  chevron.textContent = '▶';
536
739
  container.classList.add('collapsed');
740
+ if (bubble) bubble.dataset.thinkExpanded = 'false';
741
+ } else {
742
+ // Expand
743
+ content.style.display = 'block';
744
+ chevron.textContent = '▼';
745
+ container.classList.remove('collapsed');
746
+ if (bubble) bubble.dataset.thinkExpanded = 'true';
747
+ }
748
+ }
749
+
750
+ function startThinkingAnimation(container) {
751
+ const bubble = container.closest('.chat-bubble');
752
+ if (!bubble || bubble.dataset.thinkAnimActive === '1') return;
753
+ const labelEl = container.querySelector('.think-tokens-label');
754
+ if (!labelEl) return;
755
+ bubble.dataset.thinkAnimActive = '1';
756
+ let i = 0;
757
+ const update = () => {
758
+ // If streaming ended mid-cycle, stop.
759
+ if (bubble.dataset.thinkAnimActive !== '1') return;
760
+ labelEl.textContent = THINKING_FRAMES[i % THINKING_FRAMES.length];
761
+ i++;
762
+ bubble.dataset.thinkAnimId = String(setTimeout(update, THINKING_ANIM_INTERVAL_MS));
763
+ };
764
+ update();
765
+ }
766
+
767
+ function stopThinkingAnimation(container, finalLabel = 'Thought Process') {
768
+ const bubble = container.closest('.chat-bubble');
769
+ if (!bubble) return;
770
+ bubble.dataset.thinkAnimActive = '0';
771
+ const id = bubble.dataset.thinkAnimId;
772
+ if (id) {
773
+ clearTimeout(Number(id));
774
+ delete bubble.dataset.thinkAnimId;
537
775
  }
776
+ const labelEl = container.querySelector('.think-tokens-label');
777
+ if (labelEl) labelEl.textContent = finalLabel;
538
778
  }
539
779
 
540
- async function sendMessage() {
541
- const text = chatInput.value.trim();
780
+ async function sendMessage(existingTextIfAny) {
781
+ const text = (existingTextIfAny !== undefined ? existingTextIfAny : chatInput.value.trim());
542
782
  if (!text && attachedFiles.length === 0) return;
543
-
783
+
784
+ // Remove system message when user starts chatting
785
+ if (systemMessageElement) {
786
+ systemMessageElement.remove();
787
+ systemMessageElement = null;
788
+ }
789
+
544
790
  // Check if a model is loaded, if not, automatically load the default model
545
791
  if (!currentLoadedModel) {
546
792
  const allModels = window.SERVER_MODELS || {};
547
-
793
+
548
794
  if (allModels[DEFAULT_MODEL]) {
549
795
  try {
550
796
  // Show loading message
551
797
  const loadingBubble = appendMessage('system', 'Loading default model, please wait...');
552
-
798
+
553
799
  // Load the default model
554
800
  await httpRequest(getServerBaseUrl() + '/api/v1/load', {
555
801
  method: 'POST',
556
802
  headers: { 'Content-Type': 'application/json' },
557
803
  body: JSON.stringify({ model_name: DEFAULT_MODEL })
558
804
  });
559
-
805
+
560
806
  // Update model status
561
807
  await updateModelStatusIndicator();
562
-
808
+
563
809
  // Remove loading message
564
810
  loadingBubble.parentElement.remove();
565
-
811
+
566
812
  // Show success message briefly
567
813
  const successBubble = appendMessage('system', `Loaded ${DEFAULT_MODEL} successfully!`);
568
- setTimeout(() => {
569
- successBubble.parentElement.remove();
570
- }, 2000);
571
-
814
+ setTimeout(() => { successBubble.parentElement.remove(); }, 2000);
572
815
  } catch (error) {
573
816
  alert('Please load a model first before sending messages.');
574
817
  return;
@@ -578,26 +821,21 @@ async function sendMessage() {
578
821
  return;
579
822
  }
580
823
  }
581
-
824
+
582
825
  // Check if trying to send images to non-vision model
583
- if (attachedFiles.length > 0) {
584
- if (!isVisionModel(currentLoadedModel)) {
585
- alert(`Cannot send images to model "${currentLoadedModel}" as it does not support vision. Please load a model with "Vision" capabilities or remove the attached images.`);
586
- return;
587
- }
826
+ if (attachedFiles.length > 0 && !isVisionModel(currentLoadedModel)) {
827
+ alert(`Cannot send images to model "${currentLoadedModel}" as it does not support vision. Please load a model with "Vision" capabilities or remove the attached images.`);
828
+ return;
588
829
  }
589
-
830
+
590
831
  // Create message content
591
832
  let messageContent = [];
592
-
833
+
593
834
  // Add text if present
594
835
  if (text) {
595
- messageContent.push({
596
- type: "text",
597
- text: text
598
- });
836
+ messageContent.push({ type: "text", text: text });
599
837
  }
600
-
838
+
601
839
  // Add images if present
602
840
  if (attachedFiles.length > 0) {
603
841
  for (const file of attachedFiles) {
@@ -606,9 +844,7 @@ async function sendMessage() {
606
844
  const base64 = await fileToBase64(file);
607
845
  messageContent.push({
608
846
  type: "image_url",
609
- image_url: {
610
- url: `data:${file.type};base64,${base64}`
611
- }
847
+ image_url: { url: `data:${file.type};base64,${base64}` }
612
848
  });
613
849
  } catch (error) {
614
850
  console.error('Error converting image to base64:', error);
@@ -616,25 +852,25 @@ async function sendMessage() {
616
852
  }
617
853
  }
618
854
  }
619
-
855
+
620
856
  // Display user message (show text and file names)
621
857
  let displayText = text;
622
858
  if (attachedFiles.length > 0) {
623
859
  const fileNames = attachedFiles.map(f => f.name || 'pasted-image').join(', ');
624
860
  displayText = displayText ? `${displayText}\n[Images: ${fileNames}]` : `[Images: ${fileNames}]`;
625
861
  }
626
-
627
- appendMessage('user', displayText);
628
-
862
+
863
+ appendMessage('user', displayText, true);
864
+
629
865
  // Add to messages array
630
866
  const userMessage = {
631
867
  role: 'user',
632
- content: messageContent.length === 1 && messageContent[0].type === "text"
633
- ? messageContent[0].text
868
+ content: messageContent.length === 1 && messageContent[0].type === "text"
869
+ ? messageContent[0].text
634
870
  : messageContent
635
871
  };
636
872
  messages.push(userMessage);
637
-
873
+
638
874
  // Clear input and attachments
639
875
  chatInput.value = '';
640
876
  attachedFiles = [];
@@ -643,7 +879,7 @@ async function sendMessage() {
643
879
  updateAttachmentPreviewVisibility(); // Hide preview container
644
880
  updateAttachmentPreviews(); // Clear previews
645
881
  sendBtn.disabled = true;
646
-
882
+
647
883
  // Streaming OpenAI completions (placeholder, adapt as needed)
648
884
  let llmText = '';
649
885
  const llmBubble = appendMessage('llm', '...');
@@ -651,14 +887,14 @@ async function sendMessage() {
651
887
  // Use the correct endpoint for chat completions with model settings
652
888
  const modelSettings = getCurrentModelSettings ? getCurrentModelSettings() : {};
653
889
  console.log('Applying model settings to API request:', modelSettings);
654
-
890
+
655
891
  const payload = {
656
892
  model: currentLoadedModel,
657
893
  messages: messages,
658
894
  stream: true,
659
895
  ...modelSettings // Apply current model settings
660
896
  };
661
-
897
+
662
898
  const resp = await httpRequest(getServerBaseUrl() + '/api/v1/chat/completions', {
663
899
  method: 'POST',
664
900
  headers: { 'Content-Type': 'application/json' },
@@ -668,59 +904,124 @@ async function sendMessage() {
668
904
  const reader = resp.body.getReader();
669
905
  let decoder = new TextDecoder();
670
906
  llmBubble.textContent = '';
907
+
908
+ const reasoningEnabled = (() => {
909
+ try {
910
+ const meta = window.SERVER_MODELS?.[currentLoadedModel];
911
+ return Array.isArray(meta?.labels) && meta.labels.includes('reasoning');
912
+ } catch (_) { return false; }
913
+ })();
914
+
915
+ let thinkOpened = false;
916
+ let thinkClosed = false;
917
+ let reasoningSchemaActive = false; // true if we saw delta.reasoning object
918
+ let receivedAnyReasoning = false; // true once any reasoning (schema or reasoning_content) arrived
919
+
671
920
  while (true) {
672
921
  const { done, value } = await reader.read();
673
922
  if (done) break;
674
923
  const chunk = decoder.decode(value);
675
- if (chunk.trim() === 'data: [DONE]' || chunk.trim() === '[DONE]') continue;
676
-
924
+ if (!chunk.trim()) continue;
925
+
677
926
  // Handle Server-Sent Events format
678
927
  const lines = chunk.split('\n');
679
- for (const line of lines) {
680
- if (line.startsWith('data: ')) {
681
- const jsonStr = line.substring(6).trim();
682
- if (jsonStr === '[DONE]') continue;
683
-
684
- try {
685
- const delta = JSON.parse(jsonStr);
686
- if (delta.choices && delta.choices[0] && delta.choices[0].delta) {
687
- const content = delta.choices[0].delta.content;
688
- if (content) {
689
- llmText += unescapeJsonString(content);
690
- updateMessageContent(llmBubble, llmText, true);
691
- chatHistory.scrollTop = chatHistory.scrollHeight;
928
+ for (const rawLine of lines) {
929
+ if (!rawLine.startsWith('data: ')) continue;
930
+ const jsonStr = rawLine.slice(6).trim();
931
+ if (!jsonStr || jsonStr === '[DONE]') continue;
932
+
933
+ let deltaObj;
934
+ try { deltaObj = JSON.parse(jsonStr); } catch { continue; }
935
+ const choiceDelta = deltaObj?.choices?.[0]?.delta;
936
+ if (!choiceDelta) continue;
937
+
938
+ // 1. OpenAI reasoning object (preferred schema)
939
+ if (choiceDelta.reasoning && !thinkClosed) {
940
+ reasoningSchemaActive = true;
941
+ const r = choiceDelta.reasoning;
942
+ if (!thinkOpened) {
943
+ llmText += '<think>';
944
+ thinkOpened = true;
945
+ }
946
+ if (Array.isArray(r.content)) {
947
+ for (const seg of r.content) {
948
+ if (seg?.type === 'output_text' && seg.text) {
949
+ llmText += unescapeJsonString(seg.text);
950
+ receivedAnyReasoning = true;
692
951
  }
693
952
  }
694
- } catch (parseErr) {
695
- console.warn('Failed to parse JSON:', jsonStr, parseErr);
953
+ }
954
+ if (r.done && !thinkClosed) {
955
+ llmText += '</think>';
956
+ thinkClosed = true;
696
957
  }
697
958
  }
959
+
960
+ // 2. llama.cpp style: reasoning_content (string fragments)
961
+ if (choiceDelta.reasoning_content && !thinkClosed) {
962
+ if (!thinkOpened) {
963
+ llmText += '<think>';
964
+ thinkOpened = true;
965
+ }
966
+ llmText += unescapeJsonString(choiceDelta.reasoning_content);
967
+ receivedAnyReasoning = true;
968
+ // We DO NOT close yet; we’ll close when first normal content arrives.
969
+ }
970
+
971
+ // 3. Plain content tokens
972
+ if (choiceDelta.content) {
973
+ let c = unescapeJsonString(choiceDelta.content);
974
+
975
+ // If we are inside reasoning (opened, not closed) and this is the first visible answer token,
976
+ // close the reasoning block before appending (unless model already emitted </think> itself).
977
+ if (thinkOpened && !thinkClosed) {
978
+ if (c.startsWith('</think>')) {
979
+ // Model closed it explicitly; strip that tag and mark closed
980
+ c = c.replace(/^<\/think>\s*/, '');
981
+ thinkClosed = true;
982
+ } else {
983
+ // Close ourselves (covers reasoning_content path & schema early content anomaly)
984
+ if (receivedAnyReasoning || reasoningEnabled) {
985
+ llmText += '</think>';
986
+ thinkClosed = true;
987
+ }
988
+ }
989
+ }
990
+
991
+ // If content stream itself begins a new reasoning section (rare), handle gracefully
992
+ if (!thinkOpened && /<think>/.test(c)) {
993
+ thinkOpened = true;
994
+ const parts = c.split(/<think>/);
995
+ // parts[0] is anything before accidental <think>, treat as normal visible content
996
+ llmText += parts[0];
997
+ // Everything after opener treated as reasoning until a closing tag or we decide to close
998
+ llmText += '<think>' + parts.slice(1).join('<think>');
999
+ receivedAnyReasoning = true;
1000
+ updateMessageContent(llmBubble, llmText, true);
1001
+ chatHistory.scrollTop = chatHistory.scrollHeight;
1002
+ continue;
1003
+ }
1004
+
1005
+ llmText += c;
1006
+ }
1007
+
1008
+ updateMessageContent(llmBubble, llmText, true);
1009
+ chatHistory.scrollTop = chatHistory.scrollHeight;
698
1010
  }
699
1011
  }
700
- if (!llmText) throw new Error('No response');
701
-
702
- // Split assistant response into content and reasoning_content so llama.cpp's Jinja does not need to parse <think> tags
703
- function splitAssistantResponse(text) {
704
- const THINK_OPEN = '<think>';
705
- const THINK_CLOSE = '</think>';
706
- const result = { content: text };
707
- const start = text.indexOf(THINK_OPEN);
708
- const end = text.indexOf(THINK_CLOSE);
709
- if (start !== -1 && end !== -1 && end > start) {
710
- const reasoning = text.substring(start + THINK_OPEN.length, end).trim();
711
- const visible = (text.substring(0, start) + text.substring(end + THINK_CLOSE.length)).trim();
712
- if (reasoning) result.reasoning_content = reasoning;
713
- result.content = visible;
714
- }
715
- return result;
1012
+
1013
+ // Final safety close (e.g., model stopped mid-reasoning)
1014
+ if (thinkOpened && !thinkClosed) {
1015
+ llmText += '</think>';
716
1016
  }
717
1017
 
718
- const assistantMsg = splitAssistantResponse(llmText);
1018
+ const assistantMsg = extractAssistantReasoning(llmText);
719
1019
  messages.push({ role: 'assistant', ...assistantMsg });
1020
+
720
1021
  } catch (e) {
721
1022
  let detail = e.message;
722
1023
  try {
723
- const errPayload = { ...payload, stream: false };
1024
+ const errPayload = { model: currentLoadedModel, messages: messages, stream: false };
724
1025
  const errResp = await httpJson(getServerBaseUrl() + '/api/v1/chat/completions', {
725
1026
  method: 'POST',
726
1027
  headers: { 'Content-Type': 'application/json' },
@@ -732,4 +1033,6 @@ async function sendMessage() {
732
1033
  showErrorBanner(`Chat error: ${detail}`);
733
1034
  }
734
1035
  sendBtn.disabled = false;
735
- }
1036
+ // Force a final render to trigger stop animation if needed
1037
+ updateMessageContent(llmBubble, llmText, true);
1038
+ }