lemonade-sdk 9.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lemonade/__init__.py +5 -0
- lemonade/api.py +180 -0
- lemonade/cache.py +92 -0
- lemonade/cli.py +173 -0
- lemonade/common/__init__.py +0 -0
- lemonade/common/build.py +176 -0
- lemonade/common/cli_helpers.py +139 -0
- lemonade/common/exceptions.py +98 -0
- lemonade/common/filesystem.py +368 -0
- lemonade/common/inference_engines.py +408 -0
- lemonade/common/network.py +93 -0
- lemonade/common/printing.py +110 -0
- lemonade/common/status.py +471 -0
- lemonade/common/system_info.py +1411 -0
- lemonade/common/test_helpers.py +28 -0
- lemonade/profilers/__init__.py +1 -0
- lemonade/profilers/agt_power.py +437 -0
- lemonade/profilers/hwinfo_power.py +429 -0
- lemonade/profilers/memory_tracker.py +259 -0
- lemonade/profilers/profiler.py +58 -0
- lemonade/sequence.py +363 -0
- lemonade/state.py +159 -0
- lemonade/tools/__init__.py +1 -0
- lemonade/tools/accuracy.py +432 -0
- lemonade/tools/adapter.py +114 -0
- lemonade/tools/bench.py +302 -0
- lemonade/tools/flm/__init__.py +1 -0
- lemonade/tools/flm/utils.py +305 -0
- lemonade/tools/huggingface/bench.py +187 -0
- lemonade/tools/huggingface/load.py +235 -0
- lemonade/tools/huggingface/utils.py +359 -0
- lemonade/tools/humaneval.py +264 -0
- lemonade/tools/llamacpp/bench.py +255 -0
- lemonade/tools/llamacpp/load.py +222 -0
- lemonade/tools/llamacpp/utils.py +1260 -0
- lemonade/tools/management_tools.py +319 -0
- lemonade/tools/mmlu.py +319 -0
- lemonade/tools/oga/__init__.py +0 -0
- lemonade/tools/oga/bench.py +120 -0
- lemonade/tools/oga/load.py +804 -0
- lemonade/tools/oga/migration.py +403 -0
- lemonade/tools/oga/utils.py +462 -0
- lemonade/tools/perplexity.py +147 -0
- lemonade/tools/prompt.py +263 -0
- lemonade/tools/report/__init__.py +0 -0
- lemonade/tools/report/llm_report.py +203 -0
- lemonade/tools/report/table.py +899 -0
- lemonade/tools/server/__init__.py +0 -0
- lemonade/tools/server/flm.py +133 -0
- lemonade/tools/server/llamacpp.py +320 -0
- lemonade/tools/server/serve.py +2123 -0
- lemonade/tools/server/static/favicon.ico +0 -0
- lemonade/tools/server/static/index.html +279 -0
- lemonade/tools/server/static/js/chat.js +1059 -0
- lemonade/tools/server/static/js/model-settings.js +183 -0
- lemonade/tools/server/static/js/models.js +1395 -0
- lemonade/tools/server/static/js/shared.js +556 -0
- lemonade/tools/server/static/logs.html +191 -0
- lemonade/tools/server/static/styles.css +2654 -0
- lemonade/tools/server/static/webapp.html +321 -0
- lemonade/tools/server/tool_calls.py +153 -0
- lemonade/tools/server/tray.py +664 -0
- lemonade/tools/server/utils/macos_tray.py +226 -0
- lemonade/tools/server/utils/port.py +77 -0
- lemonade/tools/server/utils/thread.py +85 -0
- lemonade/tools/server/utils/windows_tray.py +408 -0
- lemonade/tools/server/webapp.py +34 -0
- lemonade/tools/server/wrapped_server.py +559 -0
- lemonade/tools/tool.py +374 -0
- lemonade/version.py +1 -0
- lemonade_install/__init__.py +1 -0
- lemonade_install/install.py +239 -0
- lemonade_sdk-9.1.1.dist-info/METADATA +276 -0
- lemonade_sdk-9.1.1.dist-info/RECORD +84 -0
- lemonade_sdk-9.1.1.dist-info/WHEEL +5 -0
- lemonade_sdk-9.1.1.dist-info/entry_points.txt +5 -0
- lemonade_sdk-9.1.1.dist-info/licenses/LICENSE +201 -0
- lemonade_sdk-9.1.1.dist-info/licenses/NOTICE.md +47 -0
- lemonade_sdk-9.1.1.dist-info/top_level.txt +3 -0
- lemonade_server/cli.py +805 -0
- lemonade_server/model_manager.py +758 -0
- lemonade_server/pydantic_models.py +159 -0
- lemonade_server/server_models.json +643 -0
- lemonade_server/settings.py +39 -0
|
@@ -0,0 +1,664 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import sys
|
|
3
|
+
import threading
|
|
4
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
5
|
+
import subprocess
|
|
6
|
+
import webbrowser
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
import logging
|
|
9
|
+
import tempfile
|
|
10
|
+
import platform
|
|
11
|
+
|
|
12
|
+
import requests
|
|
13
|
+
from packaging.version import parse as parse_version
|
|
14
|
+
|
|
15
|
+
from lemonade_server.pydantic_models import DEFAULT_CTX_SIZE
|
|
16
|
+
|
|
17
|
+
from lemonade.version import __version__
|
|
18
|
+
|
|
19
|
+
# Import the appropriate tray implementation based on platform
|
|
20
|
+
if platform.system() == "Darwin": # macOS
|
|
21
|
+
from lemonade.tools.server.utils.macos_tray import (
|
|
22
|
+
MacOSSystemTray as SystemTray,
|
|
23
|
+
Menu,
|
|
24
|
+
MenuItem,
|
|
25
|
+
)
|
|
26
|
+
else: # Windows/Linux
|
|
27
|
+
from lemonade.tools.server.utils.windows_tray import (
|
|
28
|
+
SystemTray,
|
|
29
|
+
Menu,
|
|
30
|
+
MenuItem,
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class OutputDuplicator:
|
|
35
|
+
"""
|
|
36
|
+
Output duplicator that writes to both a file and a stream.
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
def __init__(self, file_path, stream):
|
|
40
|
+
self.file = open(file_path, "a", encoding="utf-8")
|
|
41
|
+
self.stream = stream
|
|
42
|
+
|
|
43
|
+
def write(self, data):
|
|
44
|
+
self.file.write(data)
|
|
45
|
+
self.stream.write(data)
|
|
46
|
+
self.file.flush()
|
|
47
|
+
self.stream.flush()
|
|
48
|
+
|
|
49
|
+
def flush(self):
|
|
50
|
+
self.file.flush()
|
|
51
|
+
self.stream.flush()
|
|
52
|
+
|
|
53
|
+
def isatty(self):
|
|
54
|
+
# Delegate the isatty check to the original stream
|
|
55
|
+
return self.stream.isatty()
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
class LemonadeTray(SystemTray):
|
|
59
|
+
"""
|
|
60
|
+
Lemonade-specific system tray implementation.
|
|
61
|
+
"""
|
|
62
|
+
|
|
63
|
+
def __init__(self, log_file, port, server_factory, log_level="info"):
|
|
64
|
+
# Find the icon path
|
|
65
|
+
icon_path = Path(__file__).resolve().parents[0] / "static" / "favicon.ico"
|
|
66
|
+
|
|
67
|
+
# Initialize the base class
|
|
68
|
+
super().__init__("Lemonade Server", str(icon_path))
|
|
69
|
+
|
|
70
|
+
# Lemonade-specific attributes
|
|
71
|
+
self.loaded_llm = None
|
|
72
|
+
self.server = None
|
|
73
|
+
self.server_thread = None
|
|
74
|
+
self.executor = ThreadPoolExecutor(max_workers=1)
|
|
75
|
+
self.log_file = log_file
|
|
76
|
+
self.port = port
|
|
77
|
+
self.ctx_size = DEFAULT_CTX_SIZE
|
|
78
|
+
self.server_factory = server_factory
|
|
79
|
+
self.debug_logs_enabled = log_level == "debug"
|
|
80
|
+
|
|
81
|
+
# Get current and latest version
|
|
82
|
+
self.current_version = __version__
|
|
83
|
+
self.latest_version = __version__
|
|
84
|
+
self.latest_version_url = None
|
|
85
|
+
|
|
86
|
+
# We will get the models list with update_downloaded_models_background()
|
|
87
|
+
# so that getting the list isn't on the critical path of startup
|
|
88
|
+
self.downloaded_models = {}
|
|
89
|
+
|
|
90
|
+
# Set up logger
|
|
91
|
+
self.logger = logging.getLogger(__name__)
|
|
92
|
+
|
|
93
|
+
# Initialize console handler (will be set up in run method)
|
|
94
|
+
self.console_handler = None
|
|
95
|
+
|
|
96
|
+
# Background thread for updating model mapping
|
|
97
|
+
self.model_update_thread = None
|
|
98
|
+
self.stop_model_update = threading.Event()
|
|
99
|
+
|
|
100
|
+
# Background thread for version checking
|
|
101
|
+
self.version_check_thread = None
|
|
102
|
+
self.stop_version_check = threading.Event()
|
|
103
|
+
|
|
104
|
+
# Hook function for platform-specific initialization callback
|
|
105
|
+
self.on_ready = None
|
|
106
|
+
|
|
107
|
+
def get_latest_version(self):
|
|
108
|
+
"""
|
|
109
|
+
Update the latest version information.
|
|
110
|
+
"""
|
|
111
|
+
try:
|
|
112
|
+
# Prepare headers for GitHub API request
|
|
113
|
+
headers = {}
|
|
114
|
+
github_token = os.environ.get("GITHUB_TOKEN")
|
|
115
|
+
if github_token:
|
|
116
|
+
headers["Authorization"] = f"token {github_token}"
|
|
117
|
+
|
|
118
|
+
response = requests.get(
|
|
119
|
+
"https://api.github.com/repos/lemonade-sdk/lemonade/releases/latest",
|
|
120
|
+
headers=headers,
|
|
121
|
+
timeout=10, # Add timeout to prevent hanging
|
|
122
|
+
)
|
|
123
|
+
response.raise_for_status()
|
|
124
|
+
release_data = response.json()
|
|
125
|
+
|
|
126
|
+
# Get version from tag name (typically in format "vX.Y.Z")
|
|
127
|
+
self.latest_version = release_data.get("tag_name", "").lstrip("v")
|
|
128
|
+
|
|
129
|
+
# Find the installer asset
|
|
130
|
+
self.latest_version_url = None
|
|
131
|
+
for asset in release_data.get("assets", []):
|
|
132
|
+
if asset.get("name", "").endswith(
|
|
133
|
+
".exe"
|
|
134
|
+
) and "Lemonade_Server_Installer" in asset.get("name", ""):
|
|
135
|
+
self.latest_version_url = asset.get("browser_download_url")
|
|
136
|
+
break
|
|
137
|
+
|
|
138
|
+
self.logger.debug(
|
|
139
|
+
f"Updated version info: latest version {self.latest_version}"
|
|
140
|
+
)
|
|
141
|
+
except Exception as e: # pylint: disable=broad-exception-caught
|
|
142
|
+
self.logger.error(f"Error fetching latest version: {str(e)}")
|
|
143
|
+
return self.latest_version, self.latest_version_url
|
|
144
|
+
|
|
145
|
+
def update_version_background(self):
|
|
146
|
+
"""
|
|
147
|
+
Background thread function to update version information periodically.
|
|
148
|
+
"""
|
|
149
|
+
self.get_latest_version()
|
|
150
|
+
while not self.stop_version_check.wait(900): # 900 seconds = 15 minutes
|
|
151
|
+
try:
|
|
152
|
+
self.get_latest_version()
|
|
153
|
+
except Exception as e: # pylint: disable=broad-exception-caught
|
|
154
|
+
self.logger.error(f"Error updating version in background: {str(e)}")
|
|
155
|
+
|
|
156
|
+
def update_downloaded_models_background(self):
|
|
157
|
+
"""
|
|
158
|
+
Background thread function to update model mapping every 1 second until a valid
|
|
159
|
+
response is received, then every 10 seconds after that.
|
|
160
|
+
This is used to avoid a ~0.5s delay when opening the tray menu.
|
|
161
|
+
"""
|
|
162
|
+
poll_interval = 1
|
|
163
|
+
while not self.stop_model_update.wait(poll_interval):
|
|
164
|
+
try:
|
|
165
|
+
response = requests.get(
|
|
166
|
+
f"http://localhost:{self.port}/api/v0/models",
|
|
167
|
+
timeout=0.1, # Add timeout
|
|
168
|
+
)
|
|
169
|
+
response.raise_for_status()
|
|
170
|
+
|
|
171
|
+
# Update the model mapping
|
|
172
|
+
models_endpoint_response = response.json().get("data")
|
|
173
|
+
|
|
174
|
+
# Convert to dict
|
|
175
|
+
self.downloaded_models = {
|
|
176
|
+
model["id"]: {"checkpoint": model["checkpoint"]}
|
|
177
|
+
for model in models_endpoint_response
|
|
178
|
+
}
|
|
179
|
+
self.logger.debug("Model mapping updated in background")
|
|
180
|
+
|
|
181
|
+
# Increase the poling interval after we get our first response
|
|
182
|
+
poll_interval = 10
|
|
183
|
+
except Exception: # pylint: disable=broad-exception-caught
|
|
184
|
+
# Poll again later
|
|
185
|
+
pass
|
|
186
|
+
|
|
187
|
+
def unload_llms(self, _, __):
|
|
188
|
+
"""
|
|
189
|
+
Unload the currently loaded LLM.
|
|
190
|
+
"""
|
|
191
|
+
requests.post(f"http://localhost:{self.port}/api/v0/unload")
|
|
192
|
+
|
|
193
|
+
def load_llm(self, _, __, model_name):
|
|
194
|
+
"""Load an LLM model."""
|
|
195
|
+
|
|
196
|
+
# Create config for loading
|
|
197
|
+
config = {"model_name": model_name}
|
|
198
|
+
|
|
199
|
+
# Use the executor to make the request asynchronously
|
|
200
|
+
self.executor.submit(
|
|
201
|
+
lambda: requests.post(
|
|
202
|
+
f"http://localhost:{self.port}/api/v0/load", json=config
|
|
203
|
+
)
|
|
204
|
+
)
|
|
205
|
+
|
|
206
|
+
def show_logs(self, _, __):
|
|
207
|
+
"""
|
|
208
|
+
Show the log file in a new window.
|
|
209
|
+
"""
|
|
210
|
+
try:
|
|
211
|
+
system = platform.system().lower()
|
|
212
|
+
if system == "darwin":
|
|
213
|
+
# Use Terminal.app to show live logs on macOS
|
|
214
|
+
try:
|
|
215
|
+
subprocess.Popen(
|
|
216
|
+
[
|
|
217
|
+
"osascript",
|
|
218
|
+
"-e",
|
|
219
|
+
f'tell application "Terminal" to do script "tail -f {self.log_file}"',
|
|
220
|
+
]
|
|
221
|
+
)
|
|
222
|
+
except (subprocess.CalledProcessError, FileNotFoundError) as e:
|
|
223
|
+
self.logger.error(f"Failed to open Terminal for logs: {e}")
|
|
224
|
+
self.show_balloon_notification(
|
|
225
|
+
"Error",
|
|
226
|
+
f"Failed to open logs in Terminal. Log file: {self.log_file}",
|
|
227
|
+
)
|
|
228
|
+
elif system == "windows":
|
|
229
|
+
# Use PowerShell on Windows
|
|
230
|
+
subprocess.Popen(
|
|
231
|
+
[
|
|
232
|
+
"powershell",
|
|
233
|
+
"Start-Process",
|
|
234
|
+
"powershell",
|
|
235
|
+
"-ArgumentList",
|
|
236
|
+
f'"-NoExit", "Get-Content -Wait {self.log_file}"',
|
|
237
|
+
]
|
|
238
|
+
)
|
|
239
|
+
else:
|
|
240
|
+
# Unsupported platform
|
|
241
|
+
self.logger.error(f"Log viewing not supported on platform: {system}")
|
|
242
|
+
|
|
243
|
+
except Exception as e: # pylint: disable=broad-exception-caught
|
|
244
|
+
self.logger.error(f"Error opening logs: {str(e)}")
|
|
245
|
+
|
|
246
|
+
def open_documentation(self, _, __):
|
|
247
|
+
"""
|
|
248
|
+
Open the documentation in the default web browser.
|
|
249
|
+
"""
|
|
250
|
+
webbrowser.open("https://lemonade-server.ai/docs/")
|
|
251
|
+
|
|
252
|
+
def open_llm_chat(self, _, __):
|
|
253
|
+
"""
|
|
254
|
+
Open the LLM chat in the default web browser.
|
|
255
|
+
"""
|
|
256
|
+
webbrowser.open(f"http://localhost:{self.port}/#llm-chat")
|
|
257
|
+
|
|
258
|
+
def open_model_manager(self, _, __):
|
|
259
|
+
"""
|
|
260
|
+
Open the model manager in the default web browser.
|
|
261
|
+
"""
|
|
262
|
+
webbrowser.open(f"http://localhost:{self.port}/#model-management")
|
|
263
|
+
|
|
264
|
+
def check_server_state(self):
|
|
265
|
+
"""
|
|
266
|
+
Check the server state using the health endpoint
|
|
267
|
+
"""
|
|
268
|
+
try:
|
|
269
|
+
response = requests.get(
|
|
270
|
+
f"http://localhost:{self.port}/api/v0/health",
|
|
271
|
+
timeout=0.1,
|
|
272
|
+
)
|
|
273
|
+
response.raise_for_status()
|
|
274
|
+
response_data = response.json()
|
|
275
|
+
checkpoint = response_data.get("model_loaded")
|
|
276
|
+
|
|
277
|
+
# Convert checkpoint to model name if possible
|
|
278
|
+
if checkpoint:
|
|
279
|
+
# Create a mapping from checkpoint to model name
|
|
280
|
+
checkpoint_to_model = {
|
|
281
|
+
model_info["checkpoint"]: model_name
|
|
282
|
+
for model_name, model_info in self.downloaded_models.items()
|
|
283
|
+
}
|
|
284
|
+
# Use the model name if available, otherwise use the checkpoint
|
|
285
|
+
self.loaded_llm = checkpoint_to_model.get(checkpoint, checkpoint)
|
|
286
|
+
else:
|
|
287
|
+
self.loaded_llm = None
|
|
288
|
+
|
|
289
|
+
return True # Successfully checked server state
|
|
290
|
+
|
|
291
|
+
except Exception as e: # pylint: disable=broad-exception-caught
|
|
292
|
+
# Server might not be ready yet, so just log at debug level
|
|
293
|
+
self.logger.debug(f"Error checking server state: {str(e)}")
|
|
294
|
+
return False # Failed to check server state
|
|
295
|
+
|
|
296
|
+
def change_port(self, _, __, new_port):
|
|
297
|
+
"""
|
|
298
|
+
Change the server port and restart the server.
|
|
299
|
+
"""
|
|
300
|
+
|
|
301
|
+
try:
|
|
302
|
+
|
|
303
|
+
# Stop the current server
|
|
304
|
+
if self.server_thread and self.server_thread.is_alive():
|
|
305
|
+
# Set should_exit flag on the uvicorn server instance
|
|
306
|
+
if (
|
|
307
|
+
hasattr(self.server, "uvicorn_server")
|
|
308
|
+
and self.server.uvicorn_server
|
|
309
|
+
):
|
|
310
|
+
self.server.uvicorn_server.should_exit = True
|
|
311
|
+
self.server_thread.join(timeout=2)
|
|
312
|
+
|
|
313
|
+
# Update the port in both the tray and the server instance
|
|
314
|
+
self.port = new_port
|
|
315
|
+
|
|
316
|
+
# Clear the old server instance to ensure a fresh start
|
|
317
|
+
# This prevents middleware conflicts when restarting
|
|
318
|
+
self.server = None
|
|
319
|
+
|
|
320
|
+
self.server_thread = threading.Thread(target=self.start_server, daemon=True)
|
|
321
|
+
self.server_thread.start()
|
|
322
|
+
|
|
323
|
+
self.show_balloon_notification(
|
|
324
|
+
"Port Changed",
|
|
325
|
+
f"Lemonade Server is now running on port {self.port}",
|
|
326
|
+
)
|
|
327
|
+
|
|
328
|
+
except Exception as e: # pylint: disable=broad-exception-caught
|
|
329
|
+
self.logger.error(f"Error changing port: {str(e)}")
|
|
330
|
+
self.show_balloon_notification("Error", f"Failed to change port: {str(e)}")
|
|
331
|
+
|
|
332
|
+
def change_context_size(self, _, __, new_ctx_size):
|
|
333
|
+
"""
|
|
334
|
+
Change the server context size and restart the server.
|
|
335
|
+
"""
|
|
336
|
+
try:
|
|
337
|
+
# Stop the current server
|
|
338
|
+
if self.server_thread and self.server_thread.is_alive():
|
|
339
|
+
# Set should_exit flag on the uvicorn server instance
|
|
340
|
+
if (
|
|
341
|
+
hasattr(self.server, "uvicorn_server")
|
|
342
|
+
and self.server.uvicorn_server
|
|
343
|
+
):
|
|
344
|
+
self.server.uvicorn_server.should_exit = True
|
|
345
|
+
self.server_thread.join(timeout=2)
|
|
346
|
+
# Update the context size in both the tray and the server instance
|
|
347
|
+
self.ctx_size = new_ctx_size
|
|
348
|
+
if self.server:
|
|
349
|
+
self.server.ctx_size = new_ctx_size
|
|
350
|
+
# Restart the server
|
|
351
|
+
self.server_thread = threading.Thread(target=self.start_server, daemon=True)
|
|
352
|
+
self.server_thread.start()
|
|
353
|
+
# Show notification
|
|
354
|
+
ctx_size_label = (
|
|
355
|
+
f"{new_ctx_size//1024}K" if new_ctx_size >= 1024 else str(new_ctx_size)
|
|
356
|
+
)
|
|
357
|
+
self.show_balloon_notification(
|
|
358
|
+
"Context Size Changed",
|
|
359
|
+
f"Lemonade Server context size is now {ctx_size_label}",
|
|
360
|
+
)
|
|
361
|
+
except Exception as e: # pylint: disable=broad-exception-caught
|
|
362
|
+
self.logger.error(f"Error changing context size: {str(e)}")
|
|
363
|
+
self.show_balloon_notification(
|
|
364
|
+
"Error", f"Failed to change context size: {str(e)}"
|
|
365
|
+
)
|
|
366
|
+
|
|
367
|
+
def _using_installer(self):
|
|
368
|
+
"""
|
|
369
|
+
Check if the user is using the NSIS installer by checking for embeddable python
|
|
370
|
+
"""
|
|
371
|
+
py_home = Path(sys.executable).parent
|
|
372
|
+
pth_file = (
|
|
373
|
+
py_home / f"python{sys.version_info.major}{sys.version_info.minor}._pth"
|
|
374
|
+
)
|
|
375
|
+
return pth_file.exists()
|
|
376
|
+
|
|
377
|
+
def upgrade_to_latest(self, _, __):
|
|
378
|
+
"""
|
|
379
|
+
Download and launch the Lemonade Server installer if the user is using the NSIS installer
|
|
380
|
+
Otherwise, simply open the browser to the release page
|
|
381
|
+
"""
|
|
382
|
+
|
|
383
|
+
# If the user installed from source, simple open their browser to the release page
|
|
384
|
+
if not self._using_installer():
|
|
385
|
+
webbrowser.open("https://github.com/lemonade-sdk/lemonade/releases/latest")
|
|
386
|
+
return
|
|
387
|
+
|
|
388
|
+
# Show notification that download is starting
|
|
389
|
+
self.show_balloon_notification(
|
|
390
|
+
"Upgrading Lemonade",
|
|
391
|
+
"Downloading Lemonade Server Installer. Please wait...",
|
|
392
|
+
)
|
|
393
|
+
|
|
394
|
+
# Create temporary file for the installer
|
|
395
|
+
installer_path = os.path.join(
|
|
396
|
+
tempfile.gettempdir(), "Lemonade_Server_Installer.exe"
|
|
397
|
+
)
|
|
398
|
+
if os.path.exists(installer_path):
|
|
399
|
+
os.remove(installer_path)
|
|
400
|
+
|
|
401
|
+
# Download the installer
|
|
402
|
+
response = requests.get(self.latest_version_url, stream=True)
|
|
403
|
+
response.raise_for_status()
|
|
404
|
+
|
|
405
|
+
# Save the installer to disk and force write to disk
|
|
406
|
+
with open(installer_path, "wb") as f:
|
|
407
|
+
for chunk in response.iter_content(chunk_size=8192):
|
|
408
|
+
f.write(chunk)
|
|
409
|
+
f.flush()
|
|
410
|
+
os.fsync(f.fileno())
|
|
411
|
+
|
|
412
|
+
# Launch the installer as a completely detached process
|
|
413
|
+
# subprocess.DETACHED_PROCESS - Creates a process that's not attached to the console
|
|
414
|
+
# subprocess.CREATE_NEW_PROCESS_GROUP - Creates a new process group
|
|
415
|
+
# close_fds=True - Closes file descriptors to prevent inheritance
|
|
416
|
+
subprocess.Popen(
|
|
417
|
+
[installer_path],
|
|
418
|
+
creationflags=subprocess.DETACHED_PROCESS
|
|
419
|
+
| subprocess.CREATE_NEW_PROCESS_GROUP,
|
|
420
|
+
close_fds=True,
|
|
421
|
+
shell=True,
|
|
422
|
+
cwd=tempfile.gettempdir(),
|
|
423
|
+
)
|
|
424
|
+
|
|
425
|
+
# No need to quit the application, the installer will handle it
|
|
426
|
+
|
|
427
|
+
def toggle_debug_logs(self, _, __):
|
|
428
|
+
"""
|
|
429
|
+
Toggle debug logs on and off.
|
|
430
|
+
"""
|
|
431
|
+
try:
|
|
432
|
+
new_level = "debug" if not self.debug_logs_enabled else "info"
|
|
433
|
+
response = requests.post(
|
|
434
|
+
f"http://localhost:{self.port}/api/v1/log-level",
|
|
435
|
+
json={"level": new_level},
|
|
436
|
+
)
|
|
437
|
+
response.raise_for_status()
|
|
438
|
+
self.debug_logs_enabled = not self.debug_logs_enabled
|
|
439
|
+
self.show_balloon_notification(
|
|
440
|
+
"Debug Logs",
|
|
441
|
+
f"Debug logs {'enabled' if self.debug_logs_enabled else 'disabled'}",
|
|
442
|
+
)
|
|
443
|
+
except (FileNotFoundError, ValueError) as e:
|
|
444
|
+
self.logger.error(f"Error toggling debug logs: {str(e)}")
|
|
445
|
+
self.show_balloon_notification("Error", "Failed to toggle debug logs.")
|
|
446
|
+
|
|
447
|
+
def create_menu(self):
|
|
448
|
+
"""
|
|
449
|
+
Create the Lemonade-specific context menu.
|
|
450
|
+
"""
|
|
451
|
+
# Check server health when menu is opened
|
|
452
|
+
status_successfully_checked = self.check_server_state()
|
|
453
|
+
|
|
454
|
+
items = []
|
|
455
|
+
|
|
456
|
+
if not status_successfully_checked:
|
|
457
|
+
items.append(
|
|
458
|
+
MenuItem("Server Busy - See Logs for details", None, enabled=False)
|
|
459
|
+
)
|
|
460
|
+
elif self.loaded_llm:
|
|
461
|
+
items.extend(
|
|
462
|
+
[
|
|
463
|
+
MenuItem(f"Loaded: {self.loaded_llm}", None, enabled=False),
|
|
464
|
+
MenuItem("Unload LLM", self.unload_llms),
|
|
465
|
+
]
|
|
466
|
+
)
|
|
467
|
+
else:
|
|
468
|
+
items.extend(
|
|
469
|
+
[
|
|
470
|
+
MenuItem("No models loaded", None, enabled=False),
|
|
471
|
+
]
|
|
472
|
+
)
|
|
473
|
+
|
|
474
|
+
# Create menu items for all downloaded models
|
|
475
|
+
model_menu_items = []
|
|
476
|
+
if not self.downloaded_models:
|
|
477
|
+
model_menu_items.append(
|
|
478
|
+
MenuItem(
|
|
479
|
+
"No models available: Use the Model Manager to pull models",
|
|
480
|
+
None,
|
|
481
|
+
enabled=False,
|
|
482
|
+
)
|
|
483
|
+
)
|
|
484
|
+
else:
|
|
485
|
+
for model_name, _ in self.downloaded_models.items():
|
|
486
|
+
# Create a function that returns the lambda to properly capture the variables
|
|
487
|
+
def create_handler(mod):
|
|
488
|
+
return lambda icon, item: self.load_llm(icon, item, mod)
|
|
489
|
+
|
|
490
|
+
model_item = MenuItem(model_name, create_handler(model_name))
|
|
491
|
+
|
|
492
|
+
# Set checked property instead of modifying the text
|
|
493
|
+
model_item.checked = model_name == self.loaded_llm
|
|
494
|
+
model_menu_items.append(model_item)
|
|
495
|
+
|
|
496
|
+
load_submenu = Menu(*model_menu_items)
|
|
497
|
+
|
|
498
|
+
# Create port selection submenu with 3 options
|
|
499
|
+
port_menu_items = []
|
|
500
|
+
port_options = [
|
|
501
|
+
8000,
|
|
502
|
+
8020,
|
|
503
|
+
8040,
|
|
504
|
+
8060,
|
|
505
|
+
8080,
|
|
506
|
+
9000,
|
|
507
|
+
]
|
|
508
|
+
|
|
509
|
+
for port_option in port_options:
|
|
510
|
+
# Create a function that returns the lambda to properly capture the port variable
|
|
511
|
+
def create_port_handler(port):
|
|
512
|
+
return lambda icon, item: self.change_port(icon, item, port)
|
|
513
|
+
|
|
514
|
+
# Set checked property instead of modifying the text
|
|
515
|
+
port_item = MenuItem(
|
|
516
|
+
f"Port {port_option}", create_port_handler(port_option)
|
|
517
|
+
)
|
|
518
|
+
port_item.checked = port_option == self.port
|
|
519
|
+
port_menu_items.append(port_item)
|
|
520
|
+
|
|
521
|
+
port_submenu = Menu(*port_menu_items)
|
|
522
|
+
|
|
523
|
+
# Create context size selection submenu with 6 options
|
|
524
|
+
ctx_size_menu_items = []
|
|
525
|
+
ctx_size_options = [
|
|
526
|
+
("4K", 4096),
|
|
527
|
+
("8K", 8192),
|
|
528
|
+
("16K", 16384),
|
|
529
|
+
("32K", 32768),
|
|
530
|
+
("64K", 65536),
|
|
531
|
+
("128K", 131072),
|
|
532
|
+
]
|
|
533
|
+
|
|
534
|
+
for ctx_label, ctx_value in ctx_size_options:
|
|
535
|
+
# Create a function that returns the lambda to properly capture the ctx_size variable
|
|
536
|
+
def create_ctx_handler(ctx_size):
|
|
537
|
+
return lambda icon, item: self.change_context_size(icon, item, ctx_size)
|
|
538
|
+
|
|
539
|
+
ctx_item = MenuItem(
|
|
540
|
+
f"Context size {ctx_label}", create_ctx_handler(ctx_value)
|
|
541
|
+
)
|
|
542
|
+
ctx_item.checked = ctx_value == self.ctx_size
|
|
543
|
+
ctx_size_menu_items.append(ctx_item)
|
|
544
|
+
|
|
545
|
+
ctx_size_submenu = Menu(*ctx_size_menu_items)
|
|
546
|
+
|
|
547
|
+
# Create the Logs submenu
|
|
548
|
+
debug_log_text = "Enable Debug Logs"
|
|
549
|
+
debug_log_item = MenuItem(debug_log_text, self.toggle_debug_logs)
|
|
550
|
+
debug_log_item.checked = self.debug_logs_enabled
|
|
551
|
+
|
|
552
|
+
logs_submenu = Menu(
|
|
553
|
+
MenuItem("Show Logs", self.show_logs),
|
|
554
|
+
Menu.SEPARATOR,
|
|
555
|
+
debug_log_item,
|
|
556
|
+
)
|
|
557
|
+
|
|
558
|
+
if status_successfully_checked:
|
|
559
|
+
items.append(MenuItem("Load Model", None, submenu=load_submenu))
|
|
560
|
+
items.append(MenuItem("Port", None, submenu=port_submenu))
|
|
561
|
+
items.append(MenuItem("Context Size", None, submenu=ctx_size_submenu))
|
|
562
|
+
items.append(Menu.SEPARATOR)
|
|
563
|
+
|
|
564
|
+
# Only show upgrade option if newer version is available
|
|
565
|
+
if parse_version(self.latest_version) > parse_version(self.current_version):
|
|
566
|
+
items.append(
|
|
567
|
+
MenuItem(
|
|
568
|
+
f"Upgrade to version {self.latest_version}", self.upgrade_to_latest
|
|
569
|
+
)
|
|
570
|
+
)
|
|
571
|
+
|
|
572
|
+
items.append(MenuItem("Documentation", self.open_documentation))
|
|
573
|
+
items.append(MenuItem("LLM Chat", self.open_llm_chat))
|
|
574
|
+
items.append(MenuItem("Model Manager", self.open_model_manager))
|
|
575
|
+
items.append(MenuItem("Logs", None, submenu=logs_submenu))
|
|
576
|
+
items.append(Menu.SEPARATOR)
|
|
577
|
+
items.append(MenuItem("Quit Lemonade", self.exit_app))
|
|
578
|
+
return Menu(*items)
|
|
579
|
+
|
|
580
|
+
def start_server(self):
|
|
581
|
+
"""
|
|
582
|
+
Start the uvicorn server.
|
|
583
|
+
"""
|
|
584
|
+
self.server = self.server_factory()
|
|
585
|
+
|
|
586
|
+
# Ensure the server uses the current port from the tray
|
|
587
|
+
# This is important when changing ports
|
|
588
|
+
self.server.port = self.port
|
|
589
|
+
|
|
590
|
+
self.server.uvicorn_server = self.server.run_in_thread(self.server.host)
|
|
591
|
+
self.server.uvicorn_server.run()
|
|
592
|
+
|
|
593
|
+
def run(self):
|
|
594
|
+
"""
|
|
595
|
+
Run the Lemonade tray application.
|
|
596
|
+
"""
|
|
597
|
+
|
|
598
|
+
# Start the background model mapping update thread
|
|
599
|
+
self.model_update_thread = threading.Thread(
|
|
600
|
+
target=self.update_downloaded_models_background, daemon=True
|
|
601
|
+
)
|
|
602
|
+
self.model_update_thread.start()
|
|
603
|
+
|
|
604
|
+
# Start the version check thread
|
|
605
|
+
self.version_check_thread = threading.Thread(
|
|
606
|
+
target=self.update_version_background, daemon=True
|
|
607
|
+
)
|
|
608
|
+
self.version_check_thread.start()
|
|
609
|
+
|
|
610
|
+
# Start the server in a separate thread
|
|
611
|
+
self.server_thread = threading.Thread(target=self.start_server, daemon=True)
|
|
612
|
+
self.server_thread.start()
|
|
613
|
+
|
|
614
|
+
# Provide an on_ready hook that Windows base tray will call after
|
|
615
|
+
# the HWND/icon are created. macOS will call it immediately after run.
|
|
616
|
+
def _on_ready():
|
|
617
|
+
system = platform.system().lower()
|
|
618
|
+
if system == "darwin":
|
|
619
|
+
message = (
|
|
620
|
+
"Lemonade Server is running! "
|
|
621
|
+
"Click the tray icon above to access options."
|
|
622
|
+
)
|
|
623
|
+
else: # Windows/Linux
|
|
624
|
+
message = (
|
|
625
|
+
"Lemonade Server is running! "
|
|
626
|
+
"Right-click the tray icon below to access options."
|
|
627
|
+
)
|
|
628
|
+
self.show_balloon_notification("Woohoo!", message)
|
|
629
|
+
|
|
630
|
+
# Attach hook for both implementations to invoke after init
|
|
631
|
+
self.on_ready = _on_ready
|
|
632
|
+
|
|
633
|
+
# Call the parent run method which handles platform-specific initialization
|
|
634
|
+
super().run()
|
|
635
|
+
|
|
636
|
+
def exit_app(self, icon, item):
|
|
637
|
+
"""
|
|
638
|
+
Exit the application.
|
|
639
|
+
"""
|
|
640
|
+
# Stop the background threads
|
|
641
|
+
self.stop_model_update.set()
|
|
642
|
+
self.stop_version_check.set()
|
|
643
|
+
|
|
644
|
+
if self.model_update_thread and self.model_update_thread.is_alive():
|
|
645
|
+
self.model_update_thread.join(timeout=1)
|
|
646
|
+
if self.version_check_thread and self.version_check_thread.is_alive():
|
|
647
|
+
self.version_check_thread.join(timeout=1)
|
|
648
|
+
|
|
649
|
+
# Platform-specific exit handling
|
|
650
|
+
system = platform.system().lower()
|
|
651
|
+
if system == "darwin": # macOS
|
|
652
|
+
# For macOS, quit the rumps application
|
|
653
|
+
import rumps
|
|
654
|
+
|
|
655
|
+
rumps.quit_application()
|
|
656
|
+
else:
|
|
657
|
+
# Call parent exit method for Windows
|
|
658
|
+
super().exit_app(icon, item)
|
|
659
|
+
|
|
660
|
+
# Stop the server using the CLI stop command to ensure a rigorous cleanup
|
|
661
|
+
# This must be a subprocess to ensure the cleanup doesnt kill itself
|
|
662
|
+
subprocess.Popen(
|
|
663
|
+
[sys.executable, "-m", "lemonade_server.cli", "stop"], shell=True
|
|
664
|
+
)
|