lemonade-sdk 8.0.5__py3-none-any.whl → 8.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lemonade-sdk might be problematic. Click here for more details.
- lemonade/cache.py +3 -1
- lemonade/common/network.py +18 -1
- lemonade/tools/adapter.py +6 -0
- lemonade/tools/huggingface/utils.py +6 -5
- lemonade/tools/llamacpp/bench.py +28 -46
- lemonade/tools/llamacpp/load.py +104 -196
- lemonade/tools/llamacpp/utils.py +612 -0
- lemonade/tools/oga/bench.py +5 -6
- lemonade/tools/oga/load.py +239 -112
- lemonade/tools/oga/utils.py +27 -9
- lemonade/tools/prompt.py +17 -25
- lemonade/tools/report/table.py +12 -9
- lemonade/tools/server/llamacpp.py +80 -92
- lemonade/tools/server/serve.py +22 -28
- lemonade/tools/server/static/styles.css +121 -26
- lemonade/tools/server/static/webapp.html +14 -6
- lemonade/tools/server/tray.py +7 -0
- lemonade/version.py +1 -1
- lemonade_install/install.py +65 -84
- {lemonade_sdk-8.0.5.dist-info → lemonade_sdk-8.1.0.dist-info}/METADATA +32 -21
- {lemonade_sdk-8.0.5.dist-info → lemonade_sdk-8.1.0.dist-info}/RECORD +30 -29
- lemonade_server/cli.py +1 -1
- lemonade_server/model_manager.py +8 -151
- lemonade_server/pydantic_models.py +1 -4
- lemonade_server/server_models.json +44 -9
- {lemonade_sdk-8.0.5.dist-info → lemonade_sdk-8.1.0.dist-info}/WHEEL +0 -0
- {lemonade_sdk-8.0.5.dist-info → lemonade_sdk-8.1.0.dist-info}/entry_points.txt +0 -0
- {lemonade_sdk-8.0.5.dist-info → lemonade_sdk-8.1.0.dist-info}/licenses/LICENSE +0 -0
- {lemonade_sdk-8.0.5.dist-info → lemonade_sdk-8.1.0.dist-info}/licenses/NOTICE.md +0 -0
- {lemonade_sdk-8.0.5.dist-info → lemonade_sdk-8.1.0.dist-info}/top_level.txt +0 -0
lemonade/tools/report/table.py
CHANGED
|
@@ -758,15 +758,18 @@ class LemonadePerfTable(Table):
|
|
|
758
758
|
data[key] = model_stats.get(key, "")
|
|
759
759
|
|
|
760
760
|
# Create a new entry with Driver Versions and relevant Python Packages
|
|
761
|
-
sw_versions = [
|
|
762
|
-
|
|
763
|
-
|
|
764
|
-
|
|
765
|
-
|
|
766
|
-
|
|
767
|
-
|
|
768
|
-
|
|
769
|
-
|
|
761
|
+
sw_versions = []
|
|
762
|
+
if "Driver Versions" in data[fs.Keys.SYSTEM_INFO]:
|
|
763
|
+
sw_versions += [
|
|
764
|
+
key + ": " + value
|
|
765
|
+
for key, value in data[fs.Keys.SYSTEM_INFO]["Driver Versions"].items()
|
|
766
|
+
]
|
|
767
|
+
if "Python Packages" in data[fs.Keys.SYSTEM_INFO]:
|
|
768
|
+
sw_versions += [
|
|
769
|
+
pkg
|
|
770
|
+
for pkg in data[fs.Keys.SYSTEM_INFO]["Python Packages"]
|
|
771
|
+
if any(name in pkg for name in PYTHON_PACKAGES)
|
|
772
|
+
]
|
|
770
773
|
if isinstance(data[Keys.RYZEN_AI_VERSION_INFO], dict):
|
|
771
774
|
sw_versions += [
|
|
772
775
|
"Ryzen AI: " + value
|
|
@@ -1,13 +1,11 @@
|
|
|
1
|
-
import sys
|
|
2
1
|
import os
|
|
2
|
+
import sys
|
|
3
3
|
import logging
|
|
4
4
|
import time
|
|
5
5
|
import subprocess
|
|
6
|
-
import zipfile
|
|
7
6
|
import re
|
|
8
7
|
import threading
|
|
9
8
|
import platform
|
|
10
|
-
import shutil
|
|
11
9
|
|
|
12
10
|
import requests
|
|
13
11
|
from tabulate import tabulate
|
|
@@ -18,12 +16,18 @@ from openai import OpenAI
|
|
|
18
16
|
|
|
19
17
|
from lemonade_server.pydantic_models import (
|
|
20
18
|
ChatCompletionRequest,
|
|
19
|
+
CompletionRequest,
|
|
21
20
|
PullConfig,
|
|
22
21
|
EmbeddingsRequest,
|
|
23
22
|
RerankingRequest,
|
|
24
23
|
)
|
|
25
24
|
from lemonade_server.model_manager import ModelManager
|
|
26
25
|
from lemonade.tools.server.utils.port import find_free_port
|
|
26
|
+
from lemonade.tools.llamacpp.utils import (
|
|
27
|
+
get_llama_server_exe_path,
|
|
28
|
+
install_llamacpp,
|
|
29
|
+
download_gguf,
|
|
30
|
+
)
|
|
27
31
|
|
|
28
32
|
LLAMA_VERSION = "b5787"
|
|
29
33
|
|
|
@@ -80,39 +84,6 @@ def get_binary_url_and_filename(version):
|
|
|
80
84
|
return url, filename
|
|
81
85
|
|
|
82
86
|
|
|
83
|
-
def validate_platform_support():
|
|
84
|
-
"""
|
|
85
|
-
Validate platform support before attempting download
|
|
86
|
-
"""
|
|
87
|
-
system = platform.system().lower()
|
|
88
|
-
|
|
89
|
-
if system not in ["windows", "linux"]:
|
|
90
|
-
raise HTTPException(
|
|
91
|
-
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
|
|
92
|
-
detail=(
|
|
93
|
-
f"Platform {system} not supported for llamacpp. "
|
|
94
|
-
"Supported: Windows, Ubuntu Linux"
|
|
95
|
-
),
|
|
96
|
-
)
|
|
97
|
-
|
|
98
|
-
if system == "linux":
|
|
99
|
-
# Check if we're actually on Ubuntu/compatible distro and log a warning if not
|
|
100
|
-
try:
|
|
101
|
-
with open("/etc/os-release", "r", encoding="utf-8") as f:
|
|
102
|
-
os_info = f.read().lower()
|
|
103
|
-
if "ubuntu" not in os_info and "debian" not in os_info:
|
|
104
|
-
logging.warning(
|
|
105
|
-
"llamacpp binaries are built for Ubuntu. "
|
|
106
|
-
"Compatibility with other Linux distributions is not guaranteed."
|
|
107
|
-
)
|
|
108
|
-
except (FileNotFoundError, PermissionError, OSError) as e:
|
|
109
|
-
logging.warning(
|
|
110
|
-
"Could not determine Linux distribution (%s). "
|
|
111
|
-
"llamacpp binaries are built for Ubuntu.",
|
|
112
|
-
str(e),
|
|
113
|
-
)
|
|
114
|
-
|
|
115
|
-
|
|
116
87
|
class LlamaTelemetry:
|
|
117
88
|
"""
|
|
118
89
|
Manages telemetry data collection and display for llama server.
|
|
@@ -283,7 +254,7 @@ def _launch_llama_subprocess(
|
|
|
283
254
|
"""
|
|
284
255
|
|
|
285
256
|
# Get the current executable path (handles both Windows and Ubuntu structures)
|
|
286
|
-
|
|
257
|
+
exe_path = get_llama_server_exe_path()
|
|
287
258
|
|
|
288
259
|
# Build the base command
|
|
289
260
|
base_command = [exe_path, "-m", snapshot_files["variant"]]
|
|
@@ -350,68 +321,23 @@ def _launch_llama_subprocess(
|
|
|
350
321
|
|
|
351
322
|
|
|
352
323
|
def server_load(model_config: PullConfig, telemetry: LlamaTelemetry):
|
|
353
|
-
#
|
|
354
|
-
|
|
324
|
+
# Install and/or update llama.cpp if needed
|
|
325
|
+
try:
|
|
326
|
+
install_llamacpp()
|
|
327
|
+
except NotImplementedError as e:
|
|
328
|
+
raise HTTPException(
|
|
329
|
+
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, detail=str(e)
|
|
330
|
+
)
|
|
355
331
|
|
|
356
332
|
# Get platform-specific paths at runtime
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
# Check whether the llamacpp install needs an upgrade
|
|
360
|
-
version_txt_path = os.path.join(llama_server_exe_dir, "version.txt")
|
|
361
|
-
if os.path.exists(version_txt_path):
|
|
362
|
-
with open(version_txt_path, "r", encoding="utf-8") as f:
|
|
363
|
-
llamacpp_installed_version = f.read()
|
|
364
|
-
|
|
365
|
-
if llamacpp_installed_version != LLAMA_VERSION:
|
|
366
|
-
# Remove the existing install, which will trigger a new install
|
|
367
|
-
# in the next code block
|
|
368
|
-
shutil.rmtree(llama_server_exe_dir)
|
|
369
|
-
|
|
370
|
-
# Download llama.cpp server if it isn't already available
|
|
371
|
-
if not os.path.exists(llama_server_exe_dir):
|
|
372
|
-
# Download llama.cpp server zip
|
|
373
|
-
llama_zip_url, filename = get_binary_url_and_filename(LLAMA_VERSION)
|
|
374
|
-
llama_zip_path = os.path.join(os.path.dirname(sys.executable), filename)
|
|
375
|
-
logging.info(f"Downloading llama.cpp server from {llama_zip_url}")
|
|
376
|
-
|
|
377
|
-
with requests.get(llama_zip_url, stream=True) as r:
|
|
378
|
-
r.raise_for_status()
|
|
379
|
-
with open(llama_zip_path, "wb") as f:
|
|
380
|
-
for chunk in r.iter_content(chunk_size=8192):
|
|
381
|
-
f.write(chunk)
|
|
382
|
-
|
|
383
|
-
# Extract zip
|
|
384
|
-
logging.info(f"Extracting {llama_zip_path} to {llama_server_exe_dir}")
|
|
385
|
-
with zipfile.ZipFile(llama_zip_path, "r") as zip_ref:
|
|
386
|
-
zip_ref.extractall(llama_server_exe_dir)
|
|
387
|
-
|
|
388
|
-
# Make executable on Linux - need to update paths after extraction
|
|
389
|
-
if platform.system().lower() == "linux":
|
|
390
|
-
# Re-get the paths since extraction might have changed the directory structure
|
|
391
|
-
_, updated_exe_path = get_llama_server_paths()
|
|
392
|
-
if os.path.exists(updated_exe_path):
|
|
393
|
-
os.chmod(updated_exe_path, 0o755)
|
|
394
|
-
logging.info(f"Set executable permissions for {updated_exe_path}")
|
|
395
|
-
else:
|
|
396
|
-
logging.warning(
|
|
397
|
-
f"Could not find llama-server executable at {updated_exe_path}"
|
|
398
|
-
)
|
|
399
|
-
|
|
400
|
-
# Save version.txt
|
|
401
|
-
with open(version_txt_path, "w", encoding="utf-8") as vf:
|
|
402
|
-
vf.write(LLAMA_VERSION)
|
|
403
|
-
|
|
404
|
-
# Delete zip file
|
|
405
|
-
os.remove(llama_zip_path)
|
|
406
|
-
logging.info("Cleaned up zip file")
|
|
333
|
+
llama_server_exe_path = get_llama_server_exe_path()
|
|
407
334
|
|
|
408
335
|
# Download the gguf to the hugging face cache
|
|
409
|
-
|
|
410
|
-
snapshot_files = model_manager.download_gguf(model_config)
|
|
336
|
+
snapshot_files = download_gguf(model_config.checkpoint, model_config.mmproj)
|
|
411
337
|
logging.debug(f"GGUF file paths: {snapshot_files}")
|
|
412
338
|
|
|
413
339
|
# Check if model supports embeddings
|
|
414
|
-
supported_models =
|
|
340
|
+
supported_models = ModelManager().supported_models
|
|
415
341
|
model_info = supported_models.get(model_config.model_name, {})
|
|
416
342
|
supports_embeddings = "embeddings" in model_info.get("labels", [])
|
|
417
343
|
supports_reranking = "reranking" in model_info.get("labels", [])
|
|
@@ -523,6 +449,68 @@ def chat_completion(
|
|
|
523
449
|
)
|
|
524
450
|
|
|
525
451
|
|
|
452
|
+
def completion(completion_request: CompletionRequest, telemetry: LlamaTelemetry):
|
|
453
|
+
"""
|
|
454
|
+
Handle text completions using the llamacpp server.
|
|
455
|
+
|
|
456
|
+
Args:
|
|
457
|
+
completion_request: The completion request containing prompt and parameters
|
|
458
|
+
telemetry: Telemetry object containing the server port
|
|
459
|
+
|
|
460
|
+
Returns:
|
|
461
|
+
Completion response from the llamacpp server
|
|
462
|
+
"""
|
|
463
|
+
base_url = llamacpp_address(telemetry.port)
|
|
464
|
+
client = OpenAI(
|
|
465
|
+
base_url=base_url,
|
|
466
|
+
api_key="lemonade",
|
|
467
|
+
)
|
|
468
|
+
|
|
469
|
+
# Convert Pydantic model to dict and remove unset/null values
|
|
470
|
+
request_dict = completion_request.model_dump(exclude_unset=True, exclude_none=True)
|
|
471
|
+
|
|
472
|
+
# Check if streaming is requested
|
|
473
|
+
if completion_request.stream:
|
|
474
|
+
|
|
475
|
+
def event_stream():
|
|
476
|
+
try:
|
|
477
|
+
# Enable streaming
|
|
478
|
+
for chunk in client.completions.create(**request_dict):
|
|
479
|
+
yield f"data: {chunk.model_dump_json()}\n\n"
|
|
480
|
+
yield "data: [DONE]\n\n"
|
|
481
|
+
|
|
482
|
+
# Show telemetry after completion
|
|
483
|
+
telemetry.show_telemetry()
|
|
484
|
+
|
|
485
|
+
except Exception as e: # pylint: disable=broad-exception-caught
|
|
486
|
+
yield f'data: {{"error": "{str(e)}"}}\n\n'
|
|
487
|
+
|
|
488
|
+
return StreamingResponse(
|
|
489
|
+
event_stream(),
|
|
490
|
+
media_type="text/event-stream",
|
|
491
|
+
headers={
|
|
492
|
+
"Cache-Control": "no-cache",
|
|
493
|
+
"Connection": "keep-alive",
|
|
494
|
+
},
|
|
495
|
+
)
|
|
496
|
+
else:
|
|
497
|
+
# Non-streaming response
|
|
498
|
+
try:
|
|
499
|
+
# Disable streaming for non-streaming requests
|
|
500
|
+
response = client.completions.create(**request_dict)
|
|
501
|
+
|
|
502
|
+
# Show telemetry after completion
|
|
503
|
+
telemetry.show_telemetry()
|
|
504
|
+
|
|
505
|
+
return response
|
|
506
|
+
|
|
507
|
+
except Exception as e: # pylint: disable=broad-exception-caught
|
|
508
|
+
raise HTTPException(
|
|
509
|
+
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
|
510
|
+
detail=f"Completion error: {str(e)}",
|
|
511
|
+
)
|
|
512
|
+
|
|
513
|
+
|
|
526
514
|
def embeddings(embeddings_request: EmbeddingsRequest, telemetry: LlamaTelemetry):
|
|
527
515
|
"""
|
|
528
516
|
Generate embeddings using the llamacpp server.
|
lemonade/tools/server/serve.py
CHANGED
|
@@ -284,7 +284,7 @@ class Server(ManagementTool):
|
|
|
284
284
|
def _setup_server_common(
|
|
285
285
|
self,
|
|
286
286
|
port: int,
|
|
287
|
-
truncate_inputs:
|
|
287
|
+
truncate_inputs: Optional[int] = None,
|
|
288
288
|
log_level: str = DEFAULT_LOG_LEVEL,
|
|
289
289
|
tray: bool = False,
|
|
290
290
|
log_file: str = None,
|
|
@@ -295,7 +295,7 @@ class Server(ManagementTool):
|
|
|
295
295
|
|
|
296
296
|
Args:
|
|
297
297
|
port: Port number for the server
|
|
298
|
-
truncate_inputs:
|
|
298
|
+
truncate_inputs: Truncate messages to this length
|
|
299
299
|
log_level: Logging level to configure
|
|
300
300
|
threaded_mode: Whether this is being set up for threaded execution
|
|
301
301
|
"""
|
|
@@ -372,7 +372,7 @@ class Server(ManagementTool):
|
|
|
372
372
|
_=None,
|
|
373
373
|
port: int = DEFAULT_PORT,
|
|
374
374
|
log_level: str = DEFAULT_LOG_LEVEL,
|
|
375
|
-
truncate_inputs:
|
|
375
|
+
truncate_inputs: Optional[int] = None,
|
|
376
376
|
tray: bool = False,
|
|
377
377
|
log_file: str = None,
|
|
378
378
|
):
|
|
@@ -393,7 +393,7 @@ class Server(ManagementTool):
|
|
|
393
393
|
port: int = DEFAULT_PORT,
|
|
394
394
|
host: str = "localhost",
|
|
395
395
|
log_level: str = "warning",
|
|
396
|
-
truncate_inputs:
|
|
396
|
+
truncate_inputs: Optional[int] = None,
|
|
397
397
|
):
|
|
398
398
|
"""
|
|
399
399
|
Set up the server for running in a thread.
|
|
@@ -487,6 +487,9 @@ class Server(ManagementTool):
|
|
|
487
487
|
# Load the model if it's different from the currently loaded one
|
|
488
488
|
await self.load_llm(lc)
|
|
489
489
|
|
|
490
|
+
if self.llm_loaded.recipe == "llamacpp":
|
|
491
|
+
return llamacpp.completion(completion_request, self.llama_telemetry)
|
|
492
|
+
|
|
490
493
|
# Check if the model supports reasoning
|
|
491
494
|
reasoning_first_token = self.llm_loaded.reasoning
|
|
492
495
|
|
|
@@ -1096,29 +1099,20 @@ class Server(ManagementTool):
|
|
|
1096
1099
|
)
|
|
1097
1100
|
self.input_tokens = len(input_ids[0])
|
|
1098
1101
|
|
|
1099
|
-
if
|
|
1100
|
-
|
|
1101
|
-
|
|
1102
|
-
|
|
1103
|
-
|
|
1104
|
-
|
|
1105
|
-
|
|
1106
|
-
|
|
1107
|
-
|
|
1108
|
-
|
|
1109
|
-
self.
|
|
1110
|
-
|
|
1111
|
-
|
|
1112
|
-
|
|
1113
|
-
f"Input exceeded {self.llm_loaded.max_prompt_length} tokens. "
|
|
1114
|
-
f"Truncated {truncate_amount} tokens."
|
|
1115
|
-
)
|
|
1116
|
-
logging.warning(truncation_message)
|
|
1117
|
-
else:
|
|
1118
|
-
raise RuntimeError(
|
|
1119
|
-
f"Prompt tokens ({self.input_tokens}) cannot be greater "
|
|
1120
|
-
f"than the model's max prompt length ({self.llm_loaded.max_prompt_length})"
|
|
1121
|
-
)
|
|
1102
|
+
if self.truncate_inputs and self.truncate_inputs > self.input_tokens:
|
|
1103
|
+
# Truncate input ids
|
|
1104
|
+
truncate_amount = self.input_tokens - self.truncate_inputs
|
|
1105
|
+
input_ids = input_ids[: self.truncate_inputs]
|
|
1106
|
+
|
|
1107
|
+
# Update token count
|
|
1108
|
+
self.input_tokens = len(input_ids)
|
|
1109
|
+
|
|
1110
|
+
# Show warning message
|
|
1111
|
+
truncation_message = (
|
|
1112
|
+
f"Input exceeded {self.truncate_inputs} tokens. "
|
|
1113
|
+
f"Truncated {truncate_amount} tokens."
|
|
1114
|
+
)
|
|
1115
|
+
logging.warning(truncation_message)
|
|
1122
1116
|
|
|
1123
1117
|
# Log the input tokens early to avoid this not showing due to potential crashes
|
|
1124
1118
|
logging.debug(f"Input Tokens: {self.input_tokens}")
|
|
@@ -1314,7 +1308,7 @@ class Server(ManagementTool):
|
|
|
1314
1308
|
self.tokenizer = None
|
|
1315
1309
|
self.model = None
|
|
1316
1310
|
|
|
1317
|
-
default_message =
|
|
1311
|
+
default_message = "see stack trace and error message below"
|
|
1318
1312
|
if message:
|
|
1319
1313
|
detail = message
|
|
1320
1314
|
else:
|
|
@@ -1,33 +1,92 @@
|
|
|
1
|
+
/* === CSS Variables === */
|
|
2
|
+
:root {
|
|
3
|
+
/* Colors */
|
|
4
|
+
--primary-yellow: #ffe066;
|
|
5
|
+
--primary-yellow-dark: #ffd43b;
|
|
6
|
+
--accent-gold: #e6b800;
|
|
7
|
+
--accent-gold-dark: #bfa100;
|
|
8
|
+
|
|
9
|
+
--text-primary: #222;
|
|
10
|
+
--text-secondary: #555;
|
|
11
|
+
--text-muted: #666;
|
|
12
|
+
|
|
13
|
+
--bg-primary: #fffbe9;
|
|
14
|
+
--bg-secondary: #fff8dd;
|
|
15
|
+
--bg-tertiary: #fff5d1;
|
|
16
|
+
|
|
17
|
+
/* Transitions */
|
|
18
|
+
--transition-fast: 0.2s ease;
|
|
19
|
+
--transition-medium: 0.3s ease;
|
|
20
|
+
}
|
|
21
|
+
|
|
1
22
|
body {
|
|
2
23
|
margin: 0;
|
|
3
24
|
font-family: 'Segoe UI', 'Arial', sans-serif;
|
|
4
|
-
background:
|
|
5
|
-
color:
|
|
25
|
+
background: linear-gradient(135deg, var(--bg-primary) 0%, var(--bg-secondary) 50%, var(--bg-tertiary) 100%);
|
|
26
|
+
color: var(--text-primary);
|
|
6
27
|
min-height: 100vh;
|
|
7
28
|
display: flex;
|
|
8
29
|
flex-direction: column;
|
|
9
|
-
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
body::before {
|
|
33
|
+
content: '';
|
|
34
|
+
position: fixed;
|
|
35
|
+
top: 0;
|
|
36
|
+
left: 0;
|
|
37
|
+
width: 100%;
|
|
38
|
+
height: 100%;
|
|
39
|
+
background:
|
|
40
|
+
radial-gradient(circle at 20% 20%, rgba(255, 224, 102, 0.1) 0%, transparent 50%),
|
|
41
|
+
radial-gradient(circle at 80% 80%, rgba(255, 212, 59, 0.1) 0%, transparent 50%);
|
|
42
|
+
pointer-events: none;
|
|
43
|
+
z-index: -1;
|
|
10
44
|
}
|
|
11
45
|
|
|
12
46
|
.navbar {
|
|
13
47
|
display: flex;
|
|
14
|
-
justify-content:
|
|
15
|
-
|
|
16
|
-
padding:
|
|
48
|
+
justify-content: space-between;
|
|
49
|
+
align-items: center;
|
|
50
|
+
padding: 1rem 3rem 0.5rem 1rem;
|
|
17
51
|
font-size: 1.25rem;
|
|
18
52
|
font-weight: 500;
|
|
19
53
|
background: transparent;
|
|
20
54
|
letter-spacing: 0.02em;
|
|
55
|
+
position: relative;
|
|
56
|
+
transition: var(--transition-medium);
|
|
21
57
|
}
|
|
22
58
|
|
|
23
|
-
.navbar
|
|
59
|
+
.navbar-brand {
|
|
60
|
+
display: flex;
|
|
61
|
+
align-items: center;
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
.brand-title {
|
|
65
|
+
font-size: 1.5rem;
|
|
66
|
+
font-weight: 700;
|
|
67
|
+
color: var(--text-primary);
|
|
68
|
+
text-decoration: none;
|
|
69
|
+
letter-spacing: 0.01em;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
.brand-title a {
|
|
73
|
+
color: inherit;
|
|
74
|
+
text-decoration: none;
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
.navbar-links {
|
|
78
|
+
display: flex;
|
|
79
|
+
gap: 2.5rem;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
.navbar-links a {
|
|
24
83
|
color: #444;
|
|
25
84
|
text-decoration: none;
|
|
26
|
-
transition:
|
|
85
|
+
transition: var(--transition-fast);
|
|
27
86
|
}
|
|
28
87
|
|
|
29
|
-
.navbar a:hover {
|
|
30
|
-
color:
|
|
88
|
+
.navbar-links a:hover {
|
|
89
|
+
color: var(--accent-gold);
|
|
31
90
|
}
|
|
32
91
|
|
|
33
92
|
.main {
|
|
@@ -37,26 +96,14 @@ body {
|
|
|
37
96
|
align-items: center;
|
|
38
97
|
justify-content: flex-start;
|
|
39
98
|
min-height: 60vh;
|
|
40
|
-
margin-top:
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
.title {
|
|
44
|
-
font-size: 3rem;
|
|
45
|
-
font-weight: 700;
|
|
46
|
-
margin-bottom: 2.5rem;
|
|
47
|
-
letter-spacing: 0.01em;
|
|
48
|
-
text-align: center;
|
|
49
|
-
color: #222;
|
|
99
|
+
margin-top: 2rem;
|
|
100
|
+
padding-top: 1rem;
|
|
50
101
|
}
|
|
51
102
|
|
|
52
103
|
.site-footer {
|
|
53
|
-
|
|
54
|
-
left: 0;
|
|
55
|
-
bottom: 0;
|
|
56
|
-
width: 100%;
|
|
57
|
-
background-color: #fffbe9;
|
|
104
|
+
background: transparent;
|
|
58
105
|
padding-top: 0.5rem;
|
|
59
|
-
|
|
106
|
+
margin-top: auto;
|
|
60
107
|
}
|
|
61
108
|
|
|
62
109
|
.dad-joke {
|
|
@@ -483,6 +530,10 @@ body {
|
|
|
483
530
|
background-color: #ca4747;
|
|
484
531
|
}
|
|
485
532
|
|
|
533
|
+
.model-label.coding {
|
|
534
|
+
background-color: #ff6b35;
|
|
535
|
+
}
|
|
536
|
+
|
|
486
537
|
.model-labels-container {
|
|
487
538
|
display: flex;
|
|
488
539
|
align-items: center;
|
|
@@ -983,6 +1034,50 @@ body {
|
|
|
983
1034
|
}
|
|
984
1035
|
}
|
|
985
1036
|
|
|
1037
|
+
/* === Responsive Navbar === */
|
|
1038
|
+
@media (max-width: 800px) {
|
|
1039
|
+
.navbar {
|
|
1040
|
+
flex-direction: column;
|
|
1041
|
+
gap: 1rem;
|
|
1042
|
+
padding: 1rem 1rem 0.5rem 1rem;
|
|
1043
|
+
align-items: center;
|
|
1044
|
+
}
|
|
1045
|
+
|
|
1046
|
+
.navbar-brand {
|
|
1047
|
+
margin-bottom: 0.5rem;
|
|
1048
|
+
}
|
|
1049
|
+
|
|
1050
|
+
.brand-title {
|
|
1051
|
+
font-size: 1.3rem;
|
|
1052
|
+
}
|
|
1053
|
+
|
|
1054
|
+
.navbar-links {
|
|
1055
|
+
gap: 1.5rem;
|
|
1056
|
+
font-size: 1rem;
|
|
1057
|
+
}
|
|
1058
|
+
}
|
|
1059
|
+
|
|
1060
|
+
@media (max-width: 600px) {
|
|
1061
|
+
.navbar {
|
|
1062
|
+
padding: 0.5rem 0.5rem 0.25rem 0.5rem;
|
|
1063
|
+
}
|
|
1064
|
+
|
|
1065
|
+
.brand-title {
|
|
1066
|
+
font-size: 1.2rem;
|
|
1067
|
+
}
|
|
1068
|
+
|
|
1069
|
+
.navbar-links {
|
|
1070
|
+
gap: 1rem;
|
|
1071
|
+
font-size: 0.9rem;
|
|
1072
|
+
flex-wrap: wrap;
|
|
1073
|
+
justify-content: center;
|
|
1074
|
+
}
|
|
1075
|
+
|
|
1076
|
+
.main {
|
|
1077
|
+
margin-top: 0.5rem;
|
|
1078
|
+
}
|
|
1079
|
+
}
|
|
1080
|
+
|
|
986
1081
|
/* Ensure form container allows tooltip overflow */
|
|
987
1082
|
.model-mgmt-register-form {
|
|
988
1083
|
position: relative;
|
|
@@ -12,14 +12,19 @@
|
|
|
12
12
|
{{SERVER_MODELS_JS}}
|
|
13
13
|
</head>
|
|
14
14
|
<body>
|
|
15
|
-
<nav class="navbar">
|
|
16
|
-
<
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
<
|
|
15
|
+
<nav class="navbar" id="navbar">
|
|
16
|
+
<div class="navbar-brand">
|
|
17
|
+
<span class="brand-title"><a href="https://lemonade-server.ai">🍋 Lemonade Server</a></span>
|
|
18
|
+
</div>
|
|
19
|
+
<div class="navbar-links">
|
|
20
|
+
<a href="https://github.com/lemonade-sdk/lemonade" target="_blank">GitHub</a>
|
|
21
|
+
<a href="https://lemonade-server.ai/docs/" target="_blank">Docs</a>
|
|
22
|
+
<a href="https://lemonade-server.ai/docs/server/server_models/" target="_blank">Models</a>
|
|
23
|
+
<a href="https://lemonade-server.ai/docs/server/apps/" target="_blank">Featured Apps</a>
|
|
24
|
+
<a href="https://lemonade-server.ai/news/" target="_blank">News</a>
|
|
25
|
+
</div>
|
|
20
26
|
</nav>
|
|
21
27
|
<main class="main">
|
|
22
|
-
<div class="title">🍋 Lemonade Server</div>
|
|
23
28
|
<div class="tab-container">
|
|
24
29
|
<div class="tabs">
|
|
25
30
|
<button class="tab active" id="tab-chat" onclick="showTab('chat')">LLM Chat</button>
|
|
@@ -104,6 +109,7 @@
|
|
|
104
109
|
</label>
|
|
105
110
|
<select id="register-recipe" name="recipe" required>
|
|
106
111
|
<option value="llamacpp">llamacpp</option>
|
|
112
|
+
<option value="oga-npu">oga-npu</option>
|
|
107
113
|
<option value="oga-hybrid">oga-hybrid</option>
|
|
108
114
|
<option value="oga-cpu">oga-cpu</option>
|
|
109
115
|
</select>
|
|
@@ -408,6 +414,8 @@
|
|
|
408
414
|
labelClass = 'reasoning';
|
|
409
415
|
} else if (labelLower === 'reranking') {
|
|
410
416
|
labelClass = 'reranking';
|
|
417
|
+
} else if (labelLower === 'coding') {
|
|
418
|
+
labelClass = 'coding';
|
|
411
419
|
}
|
|
412
420
|
labelSpan.className = `model-label ${labelClass}`;
|
|
413
421
|
labelSpan.textContent = label;
|
lemonade/tools/server/tray.py
CHANGED
|
@@ -87,8 +87,15 @@ class LemonadeTray(SystemTray):
|
|
|
87
87
|
Update the latest version information.
|
|
88
88
|
"""
|
|
89
89
|
try:
|
|
90
|
+
# Prepare headers for GitHub API request
|
|
91
|
+
headers = {}
|
|
92
|
+
github_token = os.environ.get("GITHUB_TOKEN")
|
|
93
|
+
if github_token:
|
|
94
|
+
headers["Authorization"] = f"token {github_token}"
|
|
95
|
+
|
|
90
96
|
response = requests.get(
|
|
91
97
|
"https://api.github.com/repos/lemonade-sdk/lemonade/releases/latest",
|
|
98
|
+
headers=headers,
|
|
92
99
|
timeout=10, # Add timeout to prevent hanging
|
|
93
100
|
)
|
|
94
101
|
response.raise_for_status()
|
lemonade/version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "8.0
|
|
1
|
+
__version__ = "8.1.0"
|