lemonade-sdk 8.1.6__py3-none-any.whl → 8.1.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lemonade-sdk might be problematic. Click here for more details.

lemonade/cli.py CHANGED
@@ -25,8 +25,6 @@ from lemonade.tools.humaneval import AccuracyHumaneval
25
25
  from lemonade.tools.perplexity import AccuracyPerplexity
26
26
  from lemonade.tools.accuracy import LMEvalHarness
27
27
  from lemonade.tools.prompt import LLMPrompt
28
- from lemonade.tools.quark.quark_load import QuarkLoad
29
- from lemonade.tools.quark.quark_quantize import QuarkQuantize
30
28
  from lemonade.tools.report.llm_report import LemonadeReport
31
29
 
32
30
 
@@ -45,8 +43,6 @@ def main():
45
43
  HuggingfaceBench,
46
44
  OgaLoad,
47
45
  OgaBench,
48
- QuarkQuantize,
49
- QuarkLoad,
50
46
  LemonadeReport,
51
47
  # Inherited from lemonade
52
48
  Cache,
@@ -591,7 +591,7 @@ class LemonadePerfTable(Table):
591
591
  _wrap("Total Generated Tokens", 9),
592
592
  Keys.RESPONSE_TOKENS,
593
593
  "d",
594
- stat_fn=sum,
594
+ stat_fn=lambda x: sum(_to_list(x)),
595
595
  ),
596
596
  SimpleStat(
597
597
  _wrap("Memory Used (GB)", 8), Keys.MAX_MEMORY_USED_GBYTE, ".3f"
@@ -6,6 +6,7 @@ import threading
6
6
  import platform
7
7
 
8
8
  from dotenv import load_dotenv
9
+ from fastapi import HTTPException, status
9
10
 
10
11
  from lemonade_server.pydantic_models import (
11
12
  PullConfig,
@@ -28,6 +29,20 @@ class LlamaTelemetry(WrappedServerTelemetry):
28
29
  Parse telemetry data from llama server output lines.
29
30
  """
30
31
 
32
+ if "vk::PhysicalDevice::createDevice: ErrorExtensionNotPresent" in line:
33
+ msg = (
34
+ "Your AMD GPU driver version is not compatible with this software.\n"
35
+ "Please update and try again: "
36
+ "https://www.amd.com/en/support/download/drivers.html"
37
+ )
38
+ logging.error(msg)
39
+ raise HTTPException(
40
+ status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
41
+ detail=msg,
42
+ )
43
+ elif "error" in line.lower():
44
+ logging.error(line)
45
+
31
46
  # Parse Vulkan device detection
32
47
  vulkan_match = re.search(r"ggml_vulkan: Found (\d+) Vulkan devices?:", line)
33
48
  if vulkan_match:
@@ -182,7 +197,7 @@ class LlamaServer(WrappedServer):
182
197
  exe_dir = os.path.dirname(exe_path)
183
198
  env_file_path = os.path.join(exe_dir, ".env")
184
199
  if os.path.exists(env_file_path):
185
- load_dotenv(env_file_path, override=True)
200
+ load_dotenv(env_file_path, override=False)
186
201
  env.update(os.environ)
187
202
  logging.debug(f"Loaded environment variables from {env_file_path}")
188
203
 
@@ -133,6 +133,21 @@ class StopOnEvent:
133
133
  return self.stop_event.is_set()
134
134
 
135
135
 
136
+ class NoCacheStaticFiles(StaticFiles):
137
+ """Custom StaticFiles class with no-cache headers"""
138
+
139
+ def __init__(self, *args, **kwargs):
140
+ super().__init__(*args, **kwargs)
141
+
142
+ def file_response(self, *args, **kwargs) -> Response:
143
+ response = super().file_response(*args, **kwargs)
144
+ # Add no-cache headers for all static files
145
+ response.headers["Cache-Control"] = "no-cache, no-store, must-revalidate"
146
+ response.headers["Pragma"] = "no-cache"
147
+ response.headers["Expires"] = "0"
148
+ return response
149
+
150
+
136
151
  class Server:
137
152
  """
138
153
  Open a web server that apps can use to communicate with the LLM.
@@ -198,7 +213,7 @@ class Server:
198
213
  # as the Web App
199
214
  static_dir = Path(__file__).parent / "static"
200
215
  self.app.mount(
201
- "/static", StaticFiles(directory=static_dir), name="static_assets"
216
+ "/static", NoCacheStaticFiles(directory=static_dir), name="static_assets"
202
217
  )
203
218
 
204
219
  # Performance stats that are set during /ws and can be
@@ -1145,18 +1160,33 @@ class Server:
1145
1160
  )
1146
1161
  self.input_tokens = len(input_ids[0])
1147
1162
 
1148
- # For non-llamacpp recipes, truncate inputs to ctx_size if needed
1149
- if self.llm_loaded.recipe != "llamacpp" and self.input_tokens > self.ctx_size:
1150
- # Truncate input ids
1151
- truncate_amount = self.input_tokens - self.ctx_size
1152
- input_ids = input_ids[: self.ctx_size]
1163
+ max_prompt_length = self.ctx_size # Default fallback
1164
+ # For OGA models, try to read the actual max prompt length from config
1165
+ if "oga-" in self.llm_loaded.recipe:
1166
+ try:
1167
+ if model.config and model.config.get("max_prompt_length"):
1168
+ max_prompt_length = model.config["max_prompt_length"]
1169
+ logging.debug(
1170
+ f"Using OGA model max_prompt_length: {max_prompt_length}"
1171
+ )
1172
+ # pylint: disable=broad-exception-caught
1173
+ except Exception as e:
1174
+ logging.debug(f"Could not read OGA model config, using ctx_size: {e}")
1153
1175
 
1176
+ # Apply truncation if input exceeds the limit
1177
+ if self.input_tokens > max_prompt_length:
1178
+ # Truncate input ids
1179
+ truncate_amount = self.input_tokens - max_prompt_length
1180
+ input_ids = input_ids[:max_prompt_length]
1154
1181
  # Update token count
1155
- self.input_tokens = len(input_ids)
1182
+ if "oga-" in self.llm_loaded.recipe:
1183
+ self.input_tokens = len(input_ids)
1184
+ else:
1185
+ self.input_tokens = len(input_ids[0])
1156
1186
 
1157
- # Show warning message
1187
+ # Log warning message instead of raising exception
1158
1188
  truncation_message = (
1159
- f"Input exceeded {self.ctx_size} tokens. "
1189
+ f"Input exceeded {max_prompt_length} tokens. "
1160
1190
  f"Truncated {truncate_amount} tokens from the beginning."
1161
1191
  )
1162
1192
  logging.warning(truncation_message)
@@ -1,6 +1,7 @@
1
1
  // Chat logic and functionality
2
2
  let messages = [];
3
3
  let attachedFiles = [];
4
+ let systemMessageElement = null;
4
5
 
5
6
  // Default model configuration
6
7
  const DEFAULT_MODEL = 'Qwen2.5-0.5B-Instruct-CPU';
@@ -28,6 +29,9 @@ document.addEventListener('DOMContentLoaded', function() {
28
29
  // Update attachment button state periodically
29
30
  updateAttachmentButtonState();
30
31
  setInterval(updateAttachmentButtonState, 1000);
32
+
33
+ // Display initial system message
34
+ displaySystemMessage();
31
35
  });
32
36
 
33
37
  function setupChatEventListeners() {
@@ -163,25 +167,30 @@ function updateAttachmentButtonState() {
163
167
  attachmentBtn.style.opacity = '0.5';
164
168
  attachmentBtn.style.cursor = 'not-allowed';
165
169
  attachmentBtn.title = 'Load a model first';
166
- return;
167
- }
168
-
169
- const isVision = isVisionModel(currentLoadedModel);
170
-
171
- if (isVision) {
172
- attachmentBtn.style.opacity = '1';
173
- attachmentBtn.style.cursor = 'pointer';
174
- attachmentBtn.title = 'Attach images';
175
170
  } else {
176
- attachmentBtn.style.opacity = '0.5';
177
- attachmentBtn.style.cursor = 'not-allowed';
178
- attachmentBtn.title = 'Image attachments not supported by this model';
171
+ const isVision = isVisionModel(currentLoadedModel);
172
+
173
+ if (isVision) {
174
+ attachmentBtn.style.opacity = '1';
175
+ attachmentBtn.style.cursor = 'pointer';
176
+ attachmentBtn.title = 'Attach images';
177
+ } else {
178
+ attachmentBtn.style.opacity = '0.5';
179
+ attachmentBtn.style.cursor = 'not-allowed';
180
+ attachmentBtn.title = 'Image attachments not supported by this model';
181
+ }
179
182
  }
183
+
184
+ // Update system message when model state changes
185
+ displaySystemMessage();
180
186
  }
181
187
 
182
188
  // Make updateAttachmentButtonState accessible globally
183
189
  window.updateAttachmentButtonState = updateAttachmentButtonState;
184
190
 
191
+ // Make displaySystemMessage accessible globally
192
+ window.displaySystemMessage = displaySystemMessage;
193
+
185
194
  // Auto-load default model and send message
186
195
  async function autoLoadDefaultModelAndSend() {
187
196
  // Check if default model is available and installed
@@ -217,6 +226,7 @@ async function autoLoadDefaultModelAndSend() {
217
226
  },
218
227
  onError: (error, failedModelId) => {
219
228
  console.error('Error auto-loading default model:', error);
229
+ showErrorBanner('Failed to load model: ' + error.message);
220
230
  }
221
231
  });
222
232
  }
@@ -277,7 +287,7 @@ function handleChatInputKeydown(e) {
277
287
  if (e.key === 'Escape' && attachedFiles.length > 0) {
278
288
  e.preventDefault();
279
289
  clearAttachments();
280
- } else if (e.key === 'Enter') {
290
+ } else if (e.key === 'Enter' && !e.shiftKey) {
281
291
  // Check if we have a loaded model
282
292
  if (currentLoadedModel && modelSelect.value !== '' && !modelSelect.disabled) {
283
293
  sendMessage();
@@ -438,7 +448,8 @@ function appendMessage(role, text, isMarkdown = false) {
438
448
  const bubble = document.createElement('div');
439
449
  bubble.className = 'chat-bubble ' + role;
440
450
 
441
- if (role === 'llm' && isMarkdown) {
451
+ // Check if isMarkdown is true, regardless of role
452
+ if (isMarkdown) {
442
453
  bubble.innerHTML = renderMarkdownWithThinkTokens(text);
443
454
  } else {
444
455
  bubble.textContent = text;
@@ -450,6 +461,53 @@ function appendMessage(role, text, isMarkdown = false) {
450
461
  return bubble; // Return the bubble element for streaming updates
451
462
  }
452
463
 
464
+ // Display system message based on current state
465
+ function displaySystemMessage() {
466
+ // Remove existing system message if it exists
467
+ if (systemMessageElement) {
468
+ systemMessageElement.remove();
469
+ systemMessageElement = null;
470
+ }
471
+
472
+ // Don't show system message if there are already user/LLM messages
473
+ if (messages.length > 0) {
474
+ return;
475
+ }
476
+
477
+ let messageText = '';
478
+
479
+ // Check if any models are installed
480
+ const hasInstalledModels = window.installedModels && window.installedModels.size > 0;
481
+
482
+ if (!hasInstalledModels) {
483
+ // No models installed - show first message
484
+ messageText = `Welcome to Lemonade! To get started:
485
+ 1. Head over to the Model Management tab.
486
+ 2. Use the 📥Download button to download a model.
487
+ 3. Use the 🚀Load button to load the model.
488
+ 4. Come back to this tab, and you are ready to chat with the model.`;
489
+ } else if (!currentLoadedModel) {
490
+ // Models available but none loaded - show second message
491
+ messageText = 'Welcome to Lemonade! Choose a model from the dropdown menu below to load it and start chatting.';
492
+ }
493
+
494
+ if (messageText) {
495
+ const div = document.createElement('div');
496
+ div.className = 'chat-message system';
497
+ div.setAttribute('data-system-message', 'true');
498
+
499
+ const bubble = document.createElement('div');
500
+ bubble.className = 'chat-bubble system';
501
+ bubble.textContent = messageText;
502
+
503
+ div.appendChild(bubble);
504
+ chatHistory.appendChild(div);
505
+ chatHistory.scrollTop = chatHistory.scrollHeight;
506
+
507
+ systemMessageElement = div;
508
+ }
509
+ }
510
+
453
511
  function updateMessageContent(bubbleElement, text, isMarkdown = false) {
454
512
  if (isMarkdown) {
455
513
  bubbleElement.innerHTML = renderMarkdownWithThinkTokens(text);
@@ -541,6 +599,12 @@ async function sendMessage() {
541
599
  const text = chatInput.value.trim();
542
600
  if (!text && attachedFiles.length === 0) return;
543
601
 
602
+ // Remove system message when user starts chatting
603
+ if (systemMessageElement) {
604
+ systemMessageElement.remove();
605
+ systemMessageElement = null;
606
+ }
607
+
544
608
  // Check if a model is loaded, if not, automatically load the default model
545
609
  if (!currentLoadedModel) {
546
610
  const allModels = window.SERVER_MODELS || {};
@@ -624,7 +688,7 @@ async function sendMessage() {
624
688
  displayText = displayText ? `${displayText}\n[Images: ${fileNames}]` : `[Images: ${fileNames}]`;
625
689
  }
626
690
 
627
- appendMessage('user', displayText);
691
+ appendMessage('user', displayText, true);
628
692
 
629
693
  // Add to messages array
630
694
  const userMessage = {
@@ -59,6 +59,11 @@ async function updateModelStatusIndicator() {
59
59
  window.initializeModelDropdown();
60
60
  }
61
61
 
62
+ // Update system message when model status changes
63
+ if (window.displaySystemMessage) {
64
+ window.displaySystemMessage();
65
+ }
66
+
62
67
  // Refresh model management UI if we're on the models tab
63
68
  const modelsTab = document.getElementById('content-models');
64
69
  if (modelsTab && modelsTab.classList.contains('active')) {
@@ -417,6 +422,7 @@ async function loadModel(modelId) {
417
422
  },
418
423
  onError: (error, failedModelId) => {
419
424
  console.error(`Failed to load model ${failedModelId}:`, error);
425
+ showErrorBanner('Failed to load model: ' + error.message);
420
426
  }
421
427
  });
422
428
  }
@@ -699,6 +705,11 @@ async function refreshModelMgmtUI() {
699
705
  if (window.initializeModelDropdown) {
700
706
  window.initializeModelDropdown();
701
707
  }
708
+
709
+ // Update system message when installed models change
710
+ if (window.displaySystemMessage) {
711
+ window.displaySystemMessage();
712
+ }
702
713
  }
703
714
 
704
715
  // Make refreshModelMgmtUI globally accessible
@@ -54,6 +54,14 @@ function renderMarkdown(text) {
54
54
 
55
55
  // Display an error message in the banner
56
56
  function showErrorBanner(msg) {
57
+ // If DOM isn't ready, wait for it
58
+ if (document.readyState === 'loading') {
59
+ document.addEventListener('DOMContentLoaded', () => {
60
+ showErrorBanner(msg);
61
+ });
62
+ return;
63
+ }
64
+
57
65
  const banner = document.getElementById('error-banner');
58
66
  if (!banner) return;
59
67
  const msgEl = document.getElementById('error-banner-msg');
@@ -303,12 +311,12 @@ async function loadModelStandardized(modelId, options = {}) {
303
311
  onLoadingEnd(modelId, false);
304
312
  }
305
313
 
306
- // Call error callback or show default error
314
+ // Call error callback and always show default error banner as fallback
307
315
  if (onError) {
308
316
  onError(error, modelId);
309
- } else {
310
- showErrorBanner('Failed to load model: ' + error.message);
311
317
  }
318
+ // Always show error banner to ensure user sees the error
319
+ showErrorBanner('Failed to load model: ' + error.message);
312
320
 
313
321
  return false;
314
322
  }
@@ -157,9 +157,8 @@ body::before {
157
157
  margin-bottom: 2em;
158
158
  border-radius: 8px;
159
159
  border: 1px solid #e0e0e0;
160
- max-width: 1000px;
161
160
  min-width: 320px;
162
- width: calc(100% - 2rem); /* Responsive width with margin */
161
+ width: 100%;
163
162
  margin-left: 1rem;
164
163
  margin-right: 1rem;
165
164
  }
@@ -327,15 +326,16 @@ body::before {
327
326
  .chat-container {
328
327
  display: flex;
329
328
  flex-direction: column;
330
- height: calc(100vh - 650px); /* Subtract space for navbar, title, wall of logos, etc */
331
329
  min-height: 300px;
332
- max-height: 1200px;
333
- max-width: 800px;
330
+ min-width: 300px;
331
+ max-width: 100%;
334
332
  width: 100%;
335
333
  margin: 0 auto;
336
334
  border: 1px solid #e0e0e0;
337
335
  border-radius: 8px;
338
336
  background: #fff;
337
+ resize: both;
338
+ overflow: auto;
339
339
  }
340
340
 
341
341
  .chat-history {
@@ -388,6 +388,21 @@ body::before {
388
388
  align-self: flex-start;
389
389
  }
390
390
 
391
+ .chat-message.system {
392
+ align-items: flex-start;
393
+ }
394
+
395
+ .chat-bubble.system {
396
+ background: linear-gradient(135deg, #f0f8f0 0%, #e8f5e8 100%);
397
+ color: #2d7f47;
398
+ border-bottom-left-radius: 4px;
399
+ align-self: flex-start;
400
+ border: 1px solid #c8e6c9;
401
+ font-style: normal;
402
+ font-weight: 500;
403
+ box-shadow: 0 1px 3px rgba(45, 127, 71, 0.1);
404
+ }
405
+
391
406
  /* Markdown styling within chat bubbles */
392
407
  .chat-bubble h1,
393
408
  .chat-bubble h2,
@@ -570,7 +585,7 @@ body::before {
570
585
  align-items: center;
571
586
  }
572
587
 
573
- .input-with-indicator input[type='text'] {
588
+ #chat-input {
574
589
  flex: 1;
575
590
  padding: 0.5em;
576
591
  border: 1px solid #ddd;
@@ -578,6 +593,16 @@ body::before {
578
593
  background: #fff;
579
594
  color: #222;
580
595
  margin: 0;
596
+ resize: vertical;
597
+ min-height: 40px;
598
+ font-family: inherit;
599
+ }
600
+
601
+ /* Update placeholder style */
602
+ #chat-input::placeholder {
603
+ color: #aaa;
604
+ opacity: 1;
605
+ font-style: italic;
581
606
  }
582
607
 
583
608
  #attachment-indicator {
@@ -1897,7 +1922,6 @@ body::before {
1897
1922
 
1898
1923
  .category-content {
1899
1924
  display: none;
1900
- padding: 0;
1901
1925
  }
1902
1926
 
1903
1927
  .category-content.expanded {
@@ -52,7 +52,7 @@
52
52
  <option value="">Pick a model</option>
53
53
  </select>
54
54
  <div class="input-with-indicator">
55
- <input type="text" id="chat-input" placeholder="Type your message..." />
55
+ <textarea id="chat-input" placeholder="Type your message..." rows="1"></textarea>
56
56
  </div>
57
57
  <input type="file" id="file-attachment" style="display: none;" multiple accept="image/*">
58
58
  <button id="attachment-btn" title="Attach files">&#x1F4CE;</button>
@@ -92,9 +92,9 @@
92
92
  <img src="https://raw.githubusercontent.com/lemonade-sdk/assets/refs/heads/main/partner_logos/lm_eval.png" alt="LM-Eval" class="app-logo-img">
93
93
  <span class="app-name">LM-Eval</span>
94
94
  </a>
95
- <a href="https://lemonade-server.ai/docs/server/apps/codeGPT/" target="_blank" class="app-logo-item" title="CodeGPT">
96
- <img src="https://raw.githubusercontent.com/lemonade-sdk/assets/refs/heads/main/partner_logos/codegpt.jpg" alt="CodeGPT" class="app-logo-img">
97
- <span class="app-name">CodeGPT</span>
95
+ <a href="https://github.com/lemonade-sdk/lemonade-arcade" target="_blank" class="app-logo-item" title="Lemonade Arcade">
96
+ <img src="https://raw.githubusercontent.com/lemonade-sdk/lemonade-arcade/refs/heads/main/docs/assets/favicon.ico" alt="Lemonade Arcade" class="app-logo-img">
97
+ <span class="app-name">Lemonade Arcade</span>
98
98
  </a>
99
99
  <a href="https://github.com/lemonade-sdk/lemonade/blob/main/docs/server/apps/ai-toolkit.md" target="_blank" class="app-logo-item" title="AI Toolkit">
100
100
  <img src="https://raw.githubusercontent.com/lemonade-sdk/assets/refs/heads/main/partner_logos/ai_toolkit.png" alt="AI Toolkit" class="app-logo-img">
@@ -81,6 +81,7 @@ class WrappedServer(ABC):
81
81
  self.process: subprocess.Popen = None
82
82
  self.server_name: str = server_name
83
83
  self.telemetry: WrappedServerTelemetry = telemetry
84
+ self.log_thread_exception = None
84
85
 
85
86
  def choose_port(self):
86
87
  """
@@ -192,6 +193,8 @@ class WrappedServer(ABC):
192
193
 
193
194
  if self.process.poll() is not None:
194
195
  break
196
+ except HTTPException as e:
197
+ self.log_thread_exception = e
195
198
  except UnicodeDecodeError as e:
196
199
  logging.debug(
197
200
  "Unicode decode error reading subprocess output: %s", str(e)
@@ -217,6 +220,11 @@ class WrappedServer(ABC):
217
220
  )
218
221
  time.sleep(1)
219
222
 
223
+ if self.log_thread_exception:
224
+ e = self.log_thread_exception
225
+ self.log_thread_exception = None
226
+ raise e
227
+
220
228
  @abstractmethod
221
229
  def _launch_server_subprocess(
222
230
  self,
lemonade/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "8.1.6"
1
+ __version__ = "8.1.8"
@@ -48,10 +48,6 @@ NPU_DRIVER_DOWNLOAD_URL = (
48
48
  REQUIRED_NPU_DRIVER_VERSION = "32.0.203.280"
49
49
 
50
50
  lemonade_install_dir = Path(__file__).parent.parent.parent
51
- DEFAULT_QUARK_VERSION = "quark-0.6.0"
52
- DEFAULT_QUARK_DIR = os.path.join(
53
- lemonade_install_dir, "install", "quark", DEFAULT_QUARK_VERSION
54
- )
55
51
 
56
52
  # List of supported Ryzen AI processor series (can be extended in the future)
57
53
  SUPPORTED_RYZEN_AI_SERIES = ["300"]
@@ -177,7 +173,7 @@ def _get_ryzenai_version_info(device=None):
177
173
  f"{e}\n Please install lemonade-sdk with "
178
174
  "one of the oga extras, for example:\n"
179
175
  "pip install lemonade-sdk[dev,oga-cpu]\n"
180
- "See https://lemonade_server.ai/install_options.html for details"
176
+ "See https://lemonade-server.ai/install_options.html for details"
181
177
  ) from e
182
178
 
183
179
 
@@ -445,18 +441,18 @@ class Install:
445
441
  "variable (e.g., Ryzen AI uses environment variable OGA_TOKEN).",
446
442
  )
447
443
 
448
- parser.add_argument(
449
- "--quark",
450
- help="Install Quark Quantization tool for LLMs",
451
- choices=["0.6.0"],
452
- )
453
-
454
444
  parser.add_argument(
455
445
  "--llamacpp",
456
446
  help="Install llama.cpp binaries with specified backend",
457
447
  choices=["rocm", "vulkan"],
458
448
  )
459
449
 
450
+ parser.add_argument(
451
+ "--override",
452
+ action="store_true",
453
+ help="Override the deprecation error to use legacy tools.",
454
+ )
455
+
460
456
  return parser
461
457
 
462
458
  @staticmethod
@@ -637,7 +633,7 @@ class Install:
637
633
  return file
638
634
 
639
635
  @staticmethod
640
- def _install_ryzenai(ryzenai, build_model, yes, token):
636
+ def _install_ryzenai(ryzenai, build_model, yes, token, override=False):
641
637
  # Check if the processor is supported before proceeding
642
638
  check_ryzen_ai_processor()
643
639
 
@@ -654,6 +650,9 @@ class Install:
654
650
  + "=" * 80
655
651
  + "\n"
656
652
  )
653
+ if not override:
654
+ raise ValueError(warning_msg)
655
+
657
656
  print(warning_msg)
658
657
 
659
658
  # Delete any previous Ryzen AI installation in this environment
@@ -715,36 +714,6 @@ class Install:
715
714
  except IOError as e:
716
715
  print(f"An error occurred while writing {version_info_path}: {e}")
717
716
 
718
- @staticmethod
719
- def _install_quark(quark):
720
- quark_install_dir = os.path.join(lemonade_install_dir, "install", "quark")
721
- os.makedirs(quark_install_dir, exist_ok=True)
722
-
723
- # Install Quark utilities
724
- quark_url = (
725
- f"https://www.xilinx.com/bin/public/openDownload?filename=quark-{quark}.zip"
726
- )
727
- quark_path = download_and_extract_package(
728
- url=quark_url,
729
- version=quark,
730
- install_dir=quark_install_dir,
731
- package_name="quark",
732
- )
733
- # Install Quark wheel
734
- wheel_url = (
735
- "https://www.xilinx.com/bin/public/openDownload?"
736
- f"filename=quark-{quark}-py3-none-any.whl"
737
- )
738
- wheel_path = os.path.join(quark_install_dir, f"quark-{quark}-py3-none-any.whl")
739
- print(f"\nInstalling Quark wheel from {wheel_url}")
740
- download_file(wheel_url, wheel_path, "wheel file")
741
-
742
- install_cmd = f"{sys.executable} -m pip install --no-deps {wheel_path}"
743
- subprocess.run(install_cmd, check=True, shell=True)
744
- os.remove(wheel_path)
745
-
746
- print(f"\nQuark installed successfully at: {quark_path}")
747
-
748
717
  @staticmethod
749
718
  def _install_llamacpp(backend):
750
719
  """
@@ -762,22 +731,19 @@ class Install:
762
731
  self,
763
732
  ryzenai: Optional[str] = None,
764
733
  build_model: Optional[str] = None,
765
- quark: Optional[str] = None,
766
734
  llamacpp: Optional[str] = None,
767
735
  yes: bool = False,
768
736
  token: Optional[str] = None,
737
+ override: bool = False,
769
738
  ):
770
- if ryzenai is None and quark is None and llamacpp is None:
739
+ if ryzenai is None and llamacpp is None:
771
740
  raise ValueError(
772
741
  "You must select something to install, "
773
- "for example `--ryzenai`, `--quark`, or `--llamacpp`"
742
+ "for example `--ryzenai` or `--llamacpp`"
774
743
  )
775
744
 
776
745
  if ryzenai is not None:
777
- self._install_ryzenai(ryzenai, build_model, yes, token)
778
-
779
- if quark is not None:
780
- self._install_quark(quark)
746
+ self._install_ryzenai(ryzenai, build_model, yes, token, override)
781
747
 
782
748
  if llamacpp is not None:
783
749
  self._install_llamacpp(llamacpp)