lemonade-sdk 8.0.5__py3-none-any.whl → 8.0.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lemonade-sdk might be problematic. Click here for more details.
- lemonade/cache.py +3 -1
- lemonade/tools/adapter.py +6 -0
- lemonade/tools/huggingface/utils.py +6 -5
- lemonade/tools/llamacpp/bench.py +26 -46
- lemonade/tools/llamacpp/load.py +104 -196
- lemonade/tools/llamacpp/utils.py +612 -0
- lemonade/tools/oga/bench.py +5 -6
- lemonade/tools/oga/utils.py +8 -2
- lemonade/tools/prompt.py +17 -25
- lemonade/tools/report/table.py +12 -9
- lemonade/tools/server/llamacpp.py +80 -92
- lemonade/tools/server/serve.py +3 -0
- lemonade/tools/server/static/styles.css +116 -20
- lemonade/tools/server/static/webapp.html +11 -6
- lemonade/tools/server/tray.py +7 -0
- lemonade/version.py +1 -1
- {lemonade_sdk-8.0.5.dist-info → lemonade_sdk-8.0.6.dist-info}/METADATA +3 -3
- {lemonade_sdk-8.0.5.dist-info → lemonade_sdk-8.0.6.dist-info}/RECORD +25 -24
- lemonade_server/model_manager.py +4 -148
- lemonade_server/server_models.json +11 -0
- {lemonade_sdk-8.0.5.dist-info → lemonade_sdk-8.0.6.dist-info}/WHEEL +0 -0
- {lemonade_sdk-8.0.5.dist-info → lemonade_sdk-8.0.6.dist-info}/entry_points.txt +0 -0
- {lemonade_sdk-8.0.5.dist-info → lemonade_sdk-8.0.6.dist-info}/licenses/LICENSE +0 -0
- {lemonade_sdk-8.0.5.dist-info → lemonade_sdk-8.0.6.dist-info}/licenses/NOTICE.md +0 -0
- {lemonade_sdk-8.0.5.dist-info → lemonade_sdk-8.0.6.dist-info}/top_level.txt +0 -0
lemonade/tools/prompt.py
CHANGED
|
@@ -161,7 +161,11 @@ class LLMPrompt(Tool):
|
|
|
161
161
|
# If template flag is set, then wrap prompt in template
|
|
162
162
|
if template:
|
|
163
163
|
# Embed prompt in model's chat template
|
|
164
|
-
if tokenizer
|
|
164
|
+
if not hasattr(tokenizer, "prompt_template"):
|
|
165
|
+
printing.log_warning(
|
|
166
|
+
"Templates for this model type are not yet implemented."
|
|
167
|
+
)
|
|
168
|
+
elif tokenizer.chat_template:
|
|
165
169
|
# Use the model's built-in chat template if available
|
|
166
170
|
messages_dict = [{"role": "user", "content": prompt}]
|
|
167
171
|
prompt = tokenizer.apply_chat_template(
|
|
@@ -175,25 +179,10 @@ class LLMPrompt(Tool):
|
|
|
175
179
|
state.save_stat(Keys.PROMPT_TEMPLATE, "Default")
|
|
176
180
|
|
|
177
181
|
input_ids = tokenizer(prompt, return_tensors="pt").input_ids
|
|
178
|
-
if isinstance(input_ids, (list, str)):
|
|
179
|
-
# OGA models return a list of tokens (older versions)
|
|
180
|
-
# Our llama.cpp adapter returns a string
|
|
181
|
-
len_tokens_in = len(input_ids)
|
|
182
|
-
elif hasattr(input_ids, "shape"):
|
|
183
|
-
# HF models return a 2-D tensor
|
|
184
|
-
# OGA models with newer versions may return numpy arrays
|
|
185
|
-
if len(input_ids.shape) == 1:
|
|
186
|
-
# 1-D array from newer OGA versions
|
|
187
|
-
len_tokens_in = len(input_ids)
|
|
188
|
-
else:
|
|
189
|
-
# 2-D tensor from HF models
|
|
190
|
-
len_tokens_in = input_ids.shape[1]
|
|
191
|
-
else:
|
|
192
|
-
# Fallback: try to get length directly
|
|
193
|
-
len_tokens_in = len(input_ids)
|
|
194
182
|
|
|
195
183
|
len_tokens_out = []
|
|
196
184
|
response_texts = []
|
|
185
|
+
prompt_tokens = None # will be determined in generate function
|
|
197
186
|
for trial in range(n_trials):
|
|
198
187
|
if n_trials > 1:
|
|
199
188
|
self.set_percent_progress(100.0 * trial / n_trials)
|
|
@@ -222,19 +211,22 @@ class LLMPrompt(Tool):
|
|
|
222
211
|
|
|
223
212
|
response_array = response if isinstance(response, str) else response[0]
|
|
224
213
|
|
|
225
|
-
|
|
226
|
-
len_tokens_out.append(
|
|
214
|
+
prompt_tokens = model.prompt_tokens
|
|
215
|
+
len_tokens_out.append(model.response_tokens)
|
|
227
216
|
|
|
228
|
-
|
|
217
|
+
# Remove the input from the response
|
|
218
|
+
# (up to the point they diverge, which they should not)
|
|
219
|
+
counter = 0
|
|
220
|
+
len_input_ids = len(input_ids_array)
|
|
229
221
|
while (
|
|
230
|
-
|
|
231
|
-
and input_ids_array[
|
|
222
|
+
counter < len_input_ids
|
|
223
|
+
and input_ids_array[counter] == response_array[counter]
|
|
232
224
|
):
|
|
233
|
-
|
|
225
|
+
counter += 1
|
|
234
226
|
|
|
235
227
|
# Only decode the actual response (not the prompt)
|
|
236
228
|
response_text = tokenizer.decode(
|
|
237
|
-
response_array[
|
|
229
|
+
response_array[counter:], skip_special_tokens=True
|
|
238
230
|
).strip()
|
|
239
231
|
response_texts.append(response_text)
|
|
240
232
|
|
|
@@ -259,7 +251,7 @@ class LLMPrompt(Tool):
|
|
|
259
251
|
plt.savefig(figure_path)
|
|
260
252
|
state.save_stat(Keys.RESPONSE_LENGTHS_HISTOGRAM, figure_path)
|
|
261
253
|
|
|
262
|
-
state.save_stat(Keys.PROMPT_TOKENS,
|
|
254
|
+
state.save_stat(Keys.PROMPT_TOKENS, prompt_tokens)
|
|
263
255
|
state.save_stat(Keys.PROMPT, prompt)
|
|
264
256
|
state.save_stat(Keys.RESPONSE_TOKENS, len_tokens_out)
|
|
265
257
|
state.save_stat(Keys.RESPONSE, sanitize_text(response_texts))
|
lemonade/tools/report/table.py
CHANGED
|
@@ -758,15 +758,18 @@ class LemonadePerfTable(Table):
|
|
|
758
758
|
data[key] = model_stats.get(key, "")
|
|
759
759
|
|
|
760
760
|
# Create a new entry with Driver Versions and relevant Python Packages
|
|
761
|
-
sw_versions = [
|
|
762
|
-
|
|
763
|
-
|
|
764
|
-
|
|
765
|
-
|
|
766
|
-
|
|
767
|
-
|
|
768
|
-
|
|
769
|
-
|
|
761
|
+
sw_versions = []
|
|
762
|
+
if "Driver Versions" in data[fs.Keys.SYSTEM_INFO]:
|
|
763
|
+
sw_versions += [
|
|
764
|
+
key + ": " + value
|
|
765
|
+
for key, value in data[fs.Keys.SYSTEM_INFO]["Driver Versions"].items()
|
|
766
|
+
]
|
|
767
|
+
if "Python Packages" in data[fs.Keys.SYSTEM_INFO]:
|
|
768
|
+
sw_versions += [
|
|
769
|
+
pkg
|
|
770
|
+
for pkg in data[fs.Keys.SYSTEM_INFO]["Python Packages"]
|
|
771
|
+
if any(name in pkg for name in PYTHON_PACKAGES)
|
|
772
|
+
]
|
|
770
773
|
if isinstance(data[Keys.RYZEN_AI_VERSION_INFO], dict):
|
|
771
774
|
sw_versions += [
|
|
772
775
|
"Ryzen AI: " + value
|
|
@@ -1,13 +1,11 @@
|
|
|
1
|
-
import sys
|
|
2
1
|
import os
|
|
2
|
+
import sys
|
|
3
3
|
import logging
|
|
4
4
|
import time
|
|
5
5
|
import subprocess
|
|
6
|
-
import zipfile
|
|
7
6
|
import re
|
|
8
7
|
import threading
|
|
9
8
|
import platform
|
|
10
|
-
import shutil
|
|
11
9
|
|
|
12
10
|
import requests
|
|
13
11
|
from tabulate import tabulate
|
|
@@ -18,12 +16,18 @@ from openai import OpenAI
|
|
|
18
16
|
|
|
19
17
|
from lemonade_server.pydantic_models import (
|
|
20
18
|
ChatCompletionRequest,
|
|
19
|
+
CompletionRequest,
|
|
21
20
|
PullConfig,
|
|
22
21
|
EmbeddingsRequest,
|
|
23
22
|
RerankingRequest,
|
|
24
23
|
)
|
|
25
24
|
from lemonade_server.model_manager import ModelManager
|
|
26
25
|
from lemonade.tools.server.utils.port import find_free_port
|
|
26
|
+
from lemonade.tools.llamacpp.utils import (
|
|
27
|
+
get_llama_server_exe_path,
|
|
28
|
+
install_llamacpp,
|
|
29
|
+
download_gguf,
|
|
30
|
+
)
|
|
27
31
|
|
|
28
32
|
LLAMA_VERSION = "b5787"
|
|
29
33
|
|
|
@@ -80,39 +84,6 @@ def get_binary_url_and_filename(version):
|
|
|
80
84
|
return url, filename
|
|
81
85
|
|
|
82
86
|
|
|
83
|
-
def validate_platform_support():
|
|
84
|
-
"""
|
|
85
|
-
Validate platform support before attempting download
|
|
86
|
-
"""
|
|
87
|
-
system = platform.system().lower()
|
|
88
|
-
|
|
89
|
-
if system not in ["windows", "linux"]:
|
|
90
|
-
raise HTTPException(
|
|
91
|
-
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
|
|
92
|
-
detail=(
|
|
93
|
-
f"Platform {system} not supported for llamacpp. "
|
|
94
|
-
"Supported: Windows, Ubuntu Linux"
|
|
95
|
-
),
|
|
96
|
-
)
|
|
97
|
-
|
|
98
|
-
if system == "linux":
|
|
99
|
-
# Check if we're actually on Ubuntu/compatible distro and log a warning if not
|
|
100
|
-
try:
|
|
101
|
-
with open("/etc/os-release", "r", encoding="utf-8") as f:
|
|
102
|
-
os_info = f.read().lower()
|
|
103
|
-
if "ubuntu" not in os_info and "debian" not in os_info:
|
|
104
|
-
logging.warning(
|
|
105
|
-
"llamacpp binaries are built for Ubuntu. "
|
|
106
|
-
"Compatibility with other Linux distributions is not guaranteed."
|
|
107
|
-
)
|
|
108
|
-
except (FileNotFoundError, PermissionError, OSError) as e:
|
|
109
|
-
logging.warning(
|
|
110
|
-
"Could not determine Linux distribution (%s). "
|
|
111
|
-
"llamacpp binaries are built for Ubuntu.",
|
|
112
|
-
str(e),
|
|
113
|
-
)
|
|
114
|
-
|
|
115
|
-
|
|
116
87
|
class LlamaTelemetry:
|
|
117
88
|
"""
|
|
118
89
|
Manages telemetry data collection and display for llama server.
|
|
@@ -283,7 +254,7 @@ def _launch_llama_subprocess(
|
|
|
283
254
|
"""
|
|
284
255
|
|
|
285
256
|
# Get the current executable path (handles both Windows and Ubuntu structures)
|
|
286
|
-
|
|
257
|
+
exe_path = get_llama_server_exe_path()
|
|
287
258
|
|
|
288
259
|
# Build the base command
|
|
289
260
|
base_command = [exe_path, "-m", snapshot_files["variant"]]
|
|
@@ -350,68 +321,23 @@ def _launch_llama_subprocess(
|
|
|
350
321
|
|
|
351
322
|
|
|
352
323
|
def server_load(model_config: PullConfig, telemetry: LlamaTelemetry):
|
|
353
|
-
#
|
|
354
|
-
|
|
324
|
+
# Install and/or update llama.cpp if needed
|
|
325
|
+
try:
|
|
326
|
+
install_llamacpp()
|
|
327
|
+
except NotImplementedError as e:
|
|
328
|
+
raise HTTPException(
|
|
329
|
+
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, detail=str(e)
|
|
330
|
+
)
|
|
355
331
|
|
|
356
332
|
# Get platform-specific paths at runtime
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
# Check whether the llamacpp install needs an upgrade
|
|
360
|
-
version_txt_path = os.path.join(llama_server_exe_dir, "version.txt")
|
|
361
|
-
if os.path.exists(version_txt_path):
|
|
362
|
-
with open(version_txt_path, "r", encoding="utf-8") as f:
|
|
363
|
-
llamacpp_installed_version = f.read()
|
|
364
|
-
|
|
365
|
-
if llamacpp_installed_version != LLAMA_VERSION:
|
|
366
|
-
# Remove the existing install, which will trigger a new install
|
|
367
|
-
# in the next code block
|
|
368
|
-
shutil.rmtree(llama_server_exe_dir)
|
|
369
|
-
|
|
370
|
-
# Download llama.cpp server if it isn't already available
|
|
371
|
-
if not os.path.exists(llama_server_exe_dir):
|
|
372
|
-
# Download llama.cpp server zip
|
|
373
|
-
llama_zip_url, filename = get_binary_url_and_filename(LLAMA_VERSION)
|
|
374
|
-
llama_zip_path = os.path.join(os.path.dirname(sys.executable), filename)
|
|
375
|
-
logging.info(f"Downloading llama.cpp server from {llama_zip_url}")
|
|
376
|
-
|
|
377
|
-
with requests.get(llama_zip_url, stream=True) as r:
|
|
378
|
-
r.raise_for_status()
|
|
379
|
-
with open(llama_zip_path, "wb") as f:
|
|
380
|
-
for chunk in r.iter_content(chunk_size=8192):
|
|
381
|
-
f.write(chunk)
|
|
382
|
-
|
|
383
|
-
# Extract zip
|
|
384
|
-
logging.info(f"Extracting {llama_zip_path} to {llama_server_exe_dir}")
|
|
385
|
-
with zipfile.ZipFile(llama_zip_path, "r") as zip_ref:
|
|
386
|
-
zip_ref.extractall(llama_server_exe_dir)
|
|
387
|
-
|
|
388
|
-
# Make executable on Linux - need to update paths after extraction
|
|
389
|
-
if platform.system().lower() == "linux":
|
|
390
|
-
# Re-get the paths since extraction might have changed the directory structure
|
|
391
|
-
_, updated_exe_path = get_llama_server_paths()
|
|
392
|
-
if os.path.exists(updated_exe_path):
|
|
393
|
-
os.chmod(updated_exe_path, 0o755)
|
|
394
|
-
logging.info(f"Set executable permissions for {updated_exe_path}")
|
|
395
|
-
else:
|
|
396
|
-
logging.warning(
|
|
397
|
-
f"Could not find llama-server executable at {updated_exe_path}"
|
|
398
|
-
)
|
|
399
|
-
|
|
400
|
-
# Save version.txt
|
|
401
|
-
with open(version_txt_path, "w", encoding="utf-8") as vf:
|
|
402
|
-
vf.write(LLAMA_VERSION)
|
|
403
|
-
|
|
404
|
-
# Delete zip file
|
|
405
|
-
os.remove(llama_zip_path)
|
|
406
|
-
logging.info("Cleaned up zip file")
|
|
333
|
+
llama_server_exe_path = get_llama_server_exe_path()
|
|
407
334
|
|
|
408
335
|
# Download the gguf to the hugging face cache
|
|
409
|
-
|
|
410
|
-
snapshot_files = model_manager.download_gguf(model_config)
|
|
336
|
+
snapshot_files = download_gguf(model_config.checkpoint, model_config.mmproj)
|
|
411
337
|
logging.debug(f"GGUF file paths: {snapshot_files}")
|
|
412
338
|
|
|
413
339
|
# Check if model supports embeddings
|
|
414
|
-
supported_models =
|
|
340
|
+
supported_models = ModelManager().supported_models
|
|
415
341
|
model_info = supported_models.get(model_config.model_name, {})
|
|
416
342
|
supports_embeddings = "embeddings" in model_info.get("labels", [])
|
|
417
343
|
supports_reranking = "reranking" in model_info.get("labels", [])
|
|
@@ -523,6 +449,68 @@ def chat_completion(
|
|
|
523
449
|
)
|
|
524
450
|
|
|
525
451
|
|
|
452
|
+
def completion(completion_request: CompletionRequest, telemetry: LlamaTelemetry):
|
|
453
|
+
"""
|
|
454
|
+
Handle text completions using the llamacpp server.
|
|
455
|
+
|
|
456
|
+
Args:
|
|
457
|
+
completion_request: The completion request containing prompt and parameters
|
|
458
|
+
telemetry: Telemetry object containing the server port
|
|
459
|
+
|
|
460
|
+
Returns:
|
|
461
|
+
Completion response from the llamacpp server
|
|
462
|
+
"""
|
|
463
|
+
base_url = llamacpp_address(telemetry.port)
|
|
464
|
+
client = OpenAI(
|
|
465
|
+
base_url=base_url,
|
|
466
|
+
api_key="lemonade",
|
|
467
|
+
)
|
|
468
|
+
|
|
469
|
+
# Convert Pydantic model to dict and remove unset/null values
|
|
470
|
+
request_dict = completion_request.model_dump(exclude_unset=True, exclude_none=True)
|
|
471
|
+
|
|
472
|
+
# Check if streaming is requested
|
|
473
|
+
if completion_request.stream:
|
|
474
|
+
|
|
475
|
+
def event_stream():
|
|
476
|
+
try:
|
|
477
|
+
# Enable streaming
|
|
478
|
+
for chunk in client.completions.create(**request_dict):
|
|
479
|
+
yield f"data: {chunk.model_dump_json()}\n\n"
|
|
480
|
+
yield "data: [DONE]\n\n"
|
|
481
|
+
|
|
482
|
+
# Show telemetry after completion
|
|
483
|
+
telemetry.show_telemetry()
|
|
484
|
+
|
|
485
|
+
except Exception as e: # pylint: disable=broad-exception-caught
|
|
486
|
+
yield f'data: {{"error": "{str(e)}"}}\n\n'
|
|
487
|
+
|
|
488
|
+
return StreamingResponse(
|
|
489
|
+
event_stream(),
|
|
490
|
+
media_type="text/event-stream",
|
|
491
|
+
headers={
|
|
492
|
+
"Cache-Control": "no-cache",
|
|
493
|
+
"Connection": "keep-alive",
|
|
494
|
+
},
|
|
495
|
+
)
|
|
496
|
+
else:
|
|
497
|
+
# Non-streaming response
|
|
498
|
+
try:
|
|
499
|
+
# Disable streaming for non-streaming requests
|
|
500
|
+
response = client.completions.create(**request_dict)
|
|
501
|
+
|
|
502
|
+
# Show telemetry after completion
|
|
503
|
+
telemetry.show_telemetry()
|
|
504
|
+
|
|
505
|
+
return response
|
|
506
|
+
|
|
507
|
+
except Exception as e: # pylint: disable=broad-exception-caught
|
|
508
|
+
raise HTTPException(
|
|
509
|
+
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
|
510
|
+
detail=f"Completion error: {str(e)}",
|
|
511
|
+
)
|
|
512
|
+
|
|
513
|
+
|
|
526
514
|
def embeddings(embeddings_request: EmbeddingsRequest, telemetry: LlamaTelemetry):
|
|
527
515
|
"""
|
|
528
516
|
Generate embeddings using the llamacpp server.
|
lemonade/tools/server/serve.py
CHANGED
|
@@ -487,6 +487,9 @@ class Server(ManagementTool):
|
|
|
487
487
|
# Load the model if it's different from the currently loaded one
|
|
488
488
|
await self.load_llm(lc)
|
|
489
489
|
|
|
490
|
+
if self.llm_loaded.recipe == "llamacpp":
|
|
491
|
+
return llamacpp.completion(completion_request, self.llama_telemetry)
|
|
492
|
+
|
|
490
493
|
# Check if the model supports reasoning
|
|
491
494
|
reasoning_first_token = self.llm_loaded.reasoning
|
|
492
495
|
|
|
@@ -1,33 +1,93 @@
|
|
|
1
|
+
/* === CSS Variables === */
|
|
2
|
+
:root {
|
|
3
|
+
/* Colors */
|
|
4
|
+
--primary-yellow: #ffe066;
|
|
5
|
+
--primary-yellow-dark: #ffd43b;
|
|
6
|
+
--accent-gold: #e6b800;
|
|
7
|
+
--accent-gold-dark: #bfa100;
|
|
8
|
+
|
|
9
|
+
--text-primary: #222;
|
|
10
|
+
--text-secondary: #555;
|
|
11
|
+
--text-muted: #666;
|
|
12
|
+
|
|
13
|
+
--bg-primary: #fffbe9;
|
|
14
|
+
--bg-secondary: #fff8dd;
|
|
15
|
+
--bg-tertiary: #fff5d1;
|
|
16
|
+
|
|
17
|
+
/* Transitions */
|
|
18
|
+
--transition-fast: 0.2s ease;
|
|
19
|
+
--transition-medium: 0.3s ease;
|
|
20
|
+
}
|
|
21
|
+
|
|
1
22
|
body {
|
|
2
23
|
margin: 0;
|
|
3
24
|
font-family: 'Segoe UI', 'Arial', sans-serif;
|
|
4
|
-
background:
|
|
5
|
-
color:
|
|
25
|
+
background: linear-gradient(135deg, var(--bg-primary) 0%, var(--bg-secondary) 50%, var(--bg-tertiary) 100%);
|
|
26
|
+
color: var(--text-primary);
|
|
6
27
|
min-height: 100vh;
|
|
7
28
|
display: flex;
|
|
8
29
|
flex-direction: column;
|
|
9
30
|
padding-bottom: 5rem;
|
|
10
31
|
}
|
|
11
32
|
|
|
33
|
+
body::before {
|
|
34
|
+
content: '';
|
|
35
|
+
position: fixed;
|
|
36
|
+
top: 0;
|
|
37
|
+
left: 0;
|
|
38
|
+
width: 100%;
|
|
39
|
+
height: 100%;
|
|
40
|
+
background:
|
|
41
|
+
radial-gradient(circle at 20% 20%, rgba(255, 224, 102, 0.1) 0%, transparent 50%),
|
|
42
|
+
radial-gradient(circle at 80% 80%, rgba(255, 212, 59, 0.1) 0%, transparent 50%);
|
|
43
|
+
pointer-events: none;
|
|
44
|
+
z-index: -1;
|
|
45
|
+
}
|
|
46
|
+
|
|
12
47
|
.navbar {
|
|
13
48
|
display: flex;
|
|
14
|
-
justify-content:
|
|
15
|
-
|
|
16
|
-
padding:
|
|
49
|
+
justify-content: space-between;
|
|
50
|
+
align-items: center;
|
|
51
|
+
padding: 1rem 3rem 0.5rem 1rem;
|
|
17
52
|
font-size: 1.25rem;
|
|
18
53
|
font-weight: 500;
|
|
19
54
|
background: transparent;
|
|
20
55
|
letter-spacing: 0.02em;
|
|
56
|
+
position: relative;
|
|
57
|
+
transition: var(--transition-medium);
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
.navbar-brand {
|
|
61
|
+
display: flex;
|
|
62
|
+
align-items: center;
|
|
21
63
|
}
|
|
22
64
|
|
|
23
|
-
.
|
|
65
|
+
.brand-title {
|
|
66
|
+
font-size: 1.5rem;
|
|
67
|
+
font-weight: 700;
|
|
68
|
+
color: var(--text-primary);
|
|
69
|
+
text-decoration: none;
|
|
70
|
+
letter-spacing: 0.01em;
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
.brand-title a {
|
|
74
|
+
color: inherit;
|
|
75
|
+
text-decoration: none;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
.navbar-links {
|
|
79
|
+
display: flex;
|
|
80
|
+
gap: 2.5rem;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
.navbar-links a {
|
|
24
84
|
color: #444;
|
|
25
85
|
text-decoration: none;
|
|
26
|
-
transition:
|
|
86
|
+
transition: var(--transition-fast);
|
|
27
87
|
}
|
|
28
88
|
|
|
29
|
-
.navbar a:hover {
|
|
30
|
-
color:
|
|
89
|
+
.navbar-links a:hover {
|
|
90
|
+
color: var(--accent-gold);
|
|
31
91
|
}
|
|
32
92
|
|
|
33
93
|
.main {
|
|
@@ -37,16 +97,8 @@ body {
|
|
|
37
97
|
align-items: center;
|
|
38
98
|
justify-content: flex-start;
|
|
39
99
|
min-height: 60vh;
|
|
40
|
-
margin-top:
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
.title {
|
|
44
|
-
font-size: 3rem;
|
|
45
|
-
font-weight: 700;
|
|
46
|
-
margin-bottom: 2.5rem;
|
|
47
|
-
letter-spacing: 0.01em;
|
|
48
|
-
text-align: center;
|
|
49
|
-
color: #222;
|
|
100
|
+
margin-top: 2rem;
|
|
101
|
+
padding-top: 1rem;
|
|
50
102
|
}
|
|
51
103
|
|
|
52
104
|
.site-footer {
|
|
@@ -54,7 +106,7 @@ body {
|
|
|
54
106
|
left: 0;
|
|
55
107
|
bottom: 0;
|
|
56
108
|
width: 100%;
|
|
57
|
-
background
|
|
109
|
+
background: transparent;
|
|
58
110
|
padding-top: 0.5rem;
|
|
59
111
|
z-index: 100;
|
|
60
112
|
}
|
|
@@ -983,6 +1035,50 @@ body {
|
|
|
983
1035
|
}
|
|
984
1036
|
}
|
|
985
1037
|
|
|
1038
|
+
/* === Responsive Navbar === */
|
|
1039
|
+
@media (max-width: 800px) {
|
|
1040
|
+
.navbar {
|
|
1041
|
+
flex-direction: column;
|
|
1042
|
+
gap: 1rem;
|
|
1043
|
+
padding: 1rem 1rem 0.5rem 1rem;
|
|
1044
|
+
align-items: center;
|
|
1045
|
+
}
|
|
1046
|
+
|
|
1047
|
+
.navbar-brand {
|
|
1048
|
+
margin-bottom: 0.5rem;
|
|
1049
|
+
}
|
|
1050
|
+
|
|
1051
|
+
.brand-title {
|
|
1052
|
+
font-size: 1.3rem;
|
|
1053
|
+
}
|
|
1054
|
+
|
|
1055
|
+
.navbar-links {
|
|
1056
|
+
gap: 1.5rem;
|
|
1057
|
+
font-size: 1rem;
|
|
1058
|
+
}
|
|
1059
|
+
}
|
|
1060
|
+
|
|
1061
|
+
@media (max-width: 600px) {
|
|
1062
|
+
.navbar {
|
|
1063
|
+
padding: 0.5rem 0.5rem 0.25rem 0.5rem;
|
|
1064
|
+
}
|
|
1065
|
+
|
|
1066
|
+
.brand-title {
|
|
1067
|
+
font-size: 1.2rem;
|
|
1068
|
+
}
|
|
1069
|
+
|
|
1070
|
+
.navbar-links {
|
|
1071
|
+
gap: 1rem;
|
|
1072
|
+
font-size: 0.9rem;
|
|
1073
|
+
flex-wrap: wrap;
|
|
1074
|
+
justify-content: center;
|
|
1075
|
+
}
|
|
1076
|
+
|
|
1077
|
+
.main {
|
|
1078
|
+
margin-top: 0.5rem;
|
|
1079
|
+
}
|
|
1080
|
+
}
|
|
1081
|
+
|
|
986
1082
|
/* Ensure form container allows tooltip overflow */
|
|
987
1083
|
.model-mgmt-register-form {
|
|
988
1084
|
position: relative;
|
|
@@ -12,14 +12,19 @@
|
|
|
12
12
|
{{SERVER_MODELS_JS}}
|
|
13
13
|
</head>
|
|
14
14
|
<body>
|
|
15
|
-
<nav class="navbar">
|
|
16
|
-
<
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
<
|
|
15
|
+
<nav class="navbar" id="navbar">
|
|
16
|
+
<div class="navbar-brand">
|
|
17
|
+
<span class="brand-title"><a href="https://lemonade-server.ai">🍋 Lemonade Server</a></span>
|
|
18
|
+
</div>
|
|
19
|
+
<div class="navbar-links">
|
|
20
|
+
<a href="https://github.com/lemonade-sdk/lemonade" target="_blank">GitHub</a>
|
|
21
|
+
<a href="https://lemonade-server.ai/docs/" target="_blank">Docs</a>
|
|
22
|
+
<a href="https://lemonade-server.ai/docs/server/server_models/" target="_blank">Models</a>
|
|
23
|
+
<a href="https://lemonade-server.ai/docs/server/apps/" target="_blank">Featured Apps</a>
|
|
24
|
+
<a href="https://lemonade-server.ai/news/" target="_blank">News</a>
|
|
25
|
+
</div>
|
|
20
26
|
</nav>
|
|
21
27
|
<main class="main">
|
|
22
|
-
<div class="title">🍋 Lemonade Server</div>
|
|
23
28
|
<div class="tab-container">
|
|
24
29
|
<div class="tabs">
|
|
25
30
|
<button class="tab active" id="tab-chat" onclick="showTab('chat')">LLM Chat</button>
|
lemonade/tools/server/tray.py
CHANGED
|
@@ -87,8 +87,15 @@ class LemonadeTray(SystemTray):
|
|
|
87
87
|
Update the latest version information.
|
|
88
88
|
"""
|
|
89
89
|
try:
|
|
90
|
+
# Prepare headers for GitHub API request
|
|
91
|
+
headers = {}
|
|
92
|
+
github_token = os.environ.get("GITHUB_TOKEN")
|
|
93
|
+
if github_token:
|
|
94
|
+
headers["Authorization"] = f"token {github_token}"
|
|
95
|
+
|
|
90
96
|
response = requests.get(
|
|
91
97
|
"https://api.github.com/repos/lemonade-sdk/lemonade/releases/latest",
|
|
98
|
+
headers=headers,
|
|
92
99
|
timeout=10, # Add timeout to prevent hanging
|
|
93
100
|
)
|
|
94
101
|
response.raise_for_status()
|
lemonade/version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "8.0.
|
|
1
|
+
__version__ = "8.0.6"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: lemonade-sdk
|
|
3
|
-
Version: 8.0.
|
|
3
|
+
Version: 8.0.6
|
|
4
4
|
Summary: Lemonade SDK: Your LLM Aide for Validation and Deployment
|
|
5
5
|
Author-email: lemonade@amd.com
|
|
6
6
|
Requires-Python: >=3.10, <3.13
|
|
@@ -23,7 +23,7 @@ Requires-Dist: zstandard
|
|
|
23
23
|
Requires-Dist: fastapi
|
|
24
24
|
Requires-Dist: uvicorn[standard]
|
|
25
25
|
Requires-Dist: openai>=1.81.0
|
|
26
|
-
Requires-Dist: transformers<=4.
|
|
26
|
+
Requires-Dist: transformers<=4.53.2
|
|
27
27
|
Requires-Dist: jinja2
|
|
28
28
|
Requires-Dist: tabulate
|
|
29
29
|
Requires-Dist: sentencepiece
|
|
@@ -284,7 +284,7 @@ New contributors can find beginner-friendly issues tagged with "Good First Issue
|
|
|
284
284
|
|
|
285
285
|
## Maintainers
|
|
286
286
|
|
|
287
|
-
This project is sponsored by AMD. It is maintained by @danielholanda @jeremyfowers @ramkrishna @vgodsoe in equal measure. You can reach us by filing an [issue](https://github.com/lemonade-sdk/lemonade/issues),
|
|
287
|
+
This project is sponsored by AMD. It is maintained by @danielholanda @jeremyfowers @ramkrishna @vgodsoe in equal measure. You can reach us by filing an [issue](https://github.com/lemonade-sdk/lemonade/issues), emailing [lemonade@amd.com](mailto:lemonade@amd.com), or joining our [Discord](https://discord.gg/5xXzkMu8Zk).
|
|
288
288
|
|
|
289
289
|
## License
|
|
290
290
|
|