lemonade-sdk 7.0.4__py3-none-any.whl → 8.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lemonade-sdk might be problematic. Click here for more details.

Files changed (56) hide show
  1. lemonade/api.py +3 -3
  2. lemonade/cli.py +11 -17
  3. lemonade/common/build.py +0 -47
  4. lemonade/common/network.py +50 -0
  5. lemonade/common/status.py +2 -21
  6. lemonade/common/system_info.py +19 -4
  7. lemonade/profilers/memory_tracker.py +3 -1
  8. lemonade/tools/accuracy.py +3 -4
  9. lemonade/tools/adapter.py +1 -2
  10. lemonade/tools/{huggingface_bench.py → huggingface/bench.py} +2 -87
  11. lemonade/tools/huggingface/load.py +235 -0
  12. lemonade/tools/{huggingface_load.py → huggingface/utils.py} +87 -255
  13. lemonade/tools/humaneval.py +9 -3
  14. lemonade/tools/{llamacpp_bench.py → llamacpp/bench.py} +1 -1
  15. lemonade/tools/{llamacpp.py → llamacpp/load.py} +18 -2
  16. lemonade/tools/mmlu.py +7 -15
  17. lemonade/tools/{ort_genai/oga.py → oga/load.py} +31 -422
  18. lemonade/tools/oga/utils.py +423 -0
  19. lemonade/tools/perplexity.py +4 -3
  20. lemonade/tools/prompt.py +2 -1
  21. lemonade/tools/quark/quark_load.py +2 -1
  22. lemonade/tools/quark/quark_quantize.py +5 -5
  23. lemonade/tools/report/table.py +3 -3
  24. lemonade/tools/server/llamacpp.py +188 -45
  25. lemonade/tools/server/serve.py +184 -146
  26. lemonade/tools/server/static/favicon.ico +0 -0
  27. lemonade/tools/server/static/styles.css +568 -0
  28. lemonade/tools/server/static/webapp.html +439 -0
  29. lemonade/tools/server/tray.py +458 -0
  30. lemonade/tools/server/{port_utils.py → utils/port.py} +22 -3
  31. lemonade/tools/server/utils/system_tray.py +395 -0
  32. lemonade/tools/server/{instructions.py → webapp.py} +4 -10
  33. lemonade/version.py +1 -1
  34. lemonade_install/install.py +46 -28
  35. lemonade_sdk-8.0.1.dist-info/METADATA +179 -0
  36. lemonade_sdk-8.0.1.dist-info/RECORD +70 -0
  37. lemonade_server/cli.py +182 -27
  38. lemonade_server/model_manager.py +192 -20
  39. lemonade_server/pydantic_models.py +9 -4
  40. lemonade_server/server_models.json +5 -3
  41. lemonade/common/analyze_model.py +0 -26
  42. lemonade/common/labels.py +0 -61
  43. lemonade/common/onnx_helpers.py +0 -176
  44. lemonade/common/plugins.py +0 -10
  45. lemonade/common/tensor_helpers.py +0 -83
  46. lemonade/tools/server/static/instructions.html +0 -262
  47. lemonade_sdk-7.0.4.dist-info/METADATA +0 -113
  48. lemonade_sdk-7.0.4.dist-info/RECORD +0 -69
  49. /lemonade/tools/{ort_genai → oga}/__init__.py +0 -0
  50. /lemonade/tools/{ort_genai/oga_bench.py → oga/bench.py} +0 -0
  51. /lemonade/tools/server/{thread_utils.py → utils/thread.py} +0 -0
  52. {lemonade_sdk-7.0.4.dist-info → lemonade_sdk-8.0.1.dist-info}/WHEEL +0 -0
  53. {lemonade_sdk-7.0.4.dist-info → lemonade_sdk-8.0.1.dist-info}/entry_points.txt +0 -0
  54. {lemonade_sdk-7.0.4.dist-info → lemonade_sdk-8.0.1.dist-info}/licenses/LICENSE +0 -0
  55. {lemonade_sdk-7.0.4.dist-info → lemonade_sdk-8.0.1.dist-info}/licenses/NOTICE.md +0 -0
  56. {lemonade_sdk-7.0.4.dist-info → lemonade_sdk-8.0.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,458 @@
1
+ import os
2
+ import sys
3
+ import threading
4
+ from concurrent.futures import ThreadPoolExecutor
5
+ import subprocess
6
+ import webbrowser
7
+ from pathlib import Path
8
+ import logging
9
+ import tempfile
10
+ import requests
11
+ from packaging.version import parse as parse_version
12
+
13
+ from lemonade.version import __version__
14
+ from lemonade.tools.server.utils.system_tray import SystemTray, Menu, MenuItem
15
+
16
+
17
+ class OutputDuplicator:
18
+ """
19
+ Output duplicator that writes to both a file and a stream.
20
+ """
21
+
22
+ def __init__(self, file_path, stream):
23
+ self.file = open(file_path, "a", encoding="utf-8")
24
+ self.stream = stream
25
+
26
+ def write(self, data):
27
+ self.file.write(data)
28
+ self.stream.write(data)
29
+ self.file.flush()
30
+ self.stream.flush()
31
+
32
+ def flush(self):
33
+ self.file.flush()
34
+ self.stream.flush()
35
+
36
+ def isatty(self):
37
+ # Delegate the isatty check to the original stream
38
+ return self.stream.isatty()
39
+
40
+
41
+ class LemonadeTray(SystemTray):
42
+ """
43
+ Lemonade-specific system tray implementation.
44
+ """
45
+
46
+ def __init__(self, log_file, port, server_factory):
47
+ # Find the icon path
48
+ icon_path = Path(__file__).resolve().parents[0] / "static" / "favicon.ico"
49
+
50
+ # Initialize the base class
51
+ super().__init__("Lemonade Server", str(icon_path))
52
+
53
+ # Lemonade-specific attributes
54
+ self.loaded_llm = None
55
+ self.server = None
56
+ self.server_thread = None
57
+ self.executor = ThreadPoolExecutor(max_workers=1)
58
+ self.log_file = log_file
59
+ self.port = port
60
+ self.server_factory = server_factory
61
+
62
+ # Get current and latest version
63
+ self.current_version = __version__
64
+ self.latest_version = __version__
65
+ self.latest_version_url = None
66
+
67
+ # We will get the models list with update_downloaded_models_background()
68
+ # so that getting the list isn't on the critical path of startup
69
+ self.downloaded_models = {}
70
+
71
+ # Set up logger
72
+ self.logger = logging.getLogger(__name__)
73
+
74
+ # Initialize console handler (will be set up in run method)
75
+ self.console_handler = None
76
+
77
+ # Background thread for updating model mapping
78
+ self.model_update_thread = None
79
+ self.stop_model_update = threading.Event()
80
+
81
+ # Background thread for version checking
82
+ self.version_check_thread = None
83
+ self.stop_version_check = threading.Event()
84
+
85
+ def get_latest_version(self):
86
+ """
87
+ Update the latest version information.
88
+ """
89
+ try:
90
+ response = requests.get(
91
+ "https://api.github.com/repos/lemonade-sdk/lemonade/releases/latest",
92
+ timeout=10, # Add timeout to prevent hanging
93
+ )
94
+ response.raise_for_status()
95
+ release_data = response.json()
96
+
97
+ # Get version from tag name (typically in format "vX.Y.Z")
98
+ self.latest_version = release_data.get("tag_name", "").lstrip("v")
99
+
100
+ # Find the installer asset
101
+ self.latest_version_url = None
102
+ for asset in release_data.get("assets", []):
103
+ if asset.get("name", "").endswith(
104
+ ".exe"
105
+ ) and "Lemonade_Server_Installer" in asset.get("name", ""):
106
+ self.latest_version_url = asset.get("browser_download_url")
107
+ break
108
+
109
+ self.logger.debug(
110
+ f"Updated version info: latest version {self.latest_version}"
111
+ )
112
+ except Exception as e: # pylint: disable=broad-exception-caught
113
+ self.logger.error(f"Error fetching latest version: {str(e)}")
114
+ return self.latest_version, self.latest_version_url
115
+
116
+ def update_version_background(self):
117
+ """
118
+ Background thread function to update version information periodically.
119
+ """
120
+ self.get_latest_version()
121
+ while not self.stop_version_check.wait(900): # 900 seconds = 15 minutes
122
+ try:
123
+ self.get_latest_version()
124
+ except Exception as e: # pylint: disable=broad-exception-caught
125
+ self.logger.error(f"Error updating version in background: {str(e)}")
126
+
127
+ def update_downloaded_models_background(self):
128
+ """
129
+ Background thread function to update model mapping every 1 second until a valid
130
+ response is received, then every 10 seconds after that.
131
+ This is used to avoid a ~0.5s delay when opening the tray menu.
132
+ """
133
+ poll_interval = 1
134
+ while not self.stop_model_update.wait(poll_interval):
135
+ try:
136
+ response = requests.get(
137
+ f"http://localhost:{self.port}/api/v0/models",
138
+ timeout=0.1, # Add timeout
139
+ )
140
+ response.raise_for_status()
141
+
142
+ # Update the model mapping
143
+ models_endpoint_response = response.json().get("data")
144
+
145
+ # Convert to dict
146
+ self.downloaded_models = {
147
+ model["id"]: {"checkpoint": model["checkpoint"]}
148
+ for model in models_endpoint_response
149
+ }
150
+ self.logger.debug("Model mapping updated in background")
151
+
152
+ # Increase the poling interval after we get our first response
153
+ poll_interval = 10
154
+ except Exception: # pylint: disable=broad-exception-caught
155
+ # Poll again later
156
+ pass
157
+
158
+ def unload_llms(self, _, __):
159
+ """
160
+ Unload the currently loaded LLM.
161
+ """
162
+ requests.post(f"http://localhost:{self.port}/api/v0/unload")
163
+
164
+ def load_llm(self, _, __, model_name):
165
+ """Load an LLM model."""
166
+
167
+ # Create config for loading
168
+ config = {"model_name": model_name}
169
+
170
+ # Use the executor to make the request asynchronously
171
+ self.executor.submit(
172
+ lambda: requests.post(
173
+ f"http://localhost:{self.port}/api/v0/load", json=config
174
+ )
175
+ )
176
+
177
+ def show_logs(self, _, __):
178
+ """
179
+ Show the log file in a new window.
180
+ """
181
+ try:
182
+ subprocess.Popen(
183
+ [
184
+ "powershell",
185
+ "Start-Process",
186
+ "powershell",
187
+ "-ArgumentList",
188
+ f'"-NoExit", "Get-Content -Wait {self.log_file}"',
189
+ ]
190
+ )
191
+ except Exception as e: # pylint: disable=broad-exception-caught
192
+ self.logger.error(f"Error opening logs: {str(e)}")
193
+
194
+ def open_documentation(self, _, __):
195
+ """
196
+ Open the documentation in the default web browser.
197
+ """
198
+ webbrowser.open("https://lemonade-server.ai/docs/")
199
+
200
+ def open_model_manager(self, _, __):
201
+ """
202
+ Open the model manager in the default web browser.
203
+ """
204
+ webbrowser.open(f"http://localhost:{self.port}/")
205
+
206
+ def check_server_state(self):
207
+ """
208
+ Check the server state using the health endpoint
209
+ """
210
+ try:
211
+ response = requests.get(
212
+ f"http://localhost:{self.port}/api/v0/health",
213
+ timeout=0.1, # Add timeout
214
+ )
215
+ response.raise_for_status()
216
+ response_data = response.json()
217
+ checkpoint = response_data.get("model_loaded")
218
+
219
+ # Convert checkpoint to model name if possible
220
+ if checkpoint:
221
+ # Create a mapping from checkpoint to model name
222
+ checkpoint_to_model = {
223
+ model_info["checkpoint"]: model_name
224
+ for model_name, model_info in self.downloaded_models.items()
225
+ }
226
+ # Use the model name if available, otherwise use the checkpoint
227
+ self.loaded_llm = checkpoint_to_model.get(checkpoint, checkpoint)
228
+ else:
229
+ self.loaded_llm = None
230
+
231
+ return True # Successfully checked server state
232
+
233
+ except Exception as e: # pylint: disable=broad-exception-caught
234
+ # Server might not be ready yet, so just log at debug level
235
+ self.logger.debug(f"Error checking server state: {str(e)}")
236
+ return False # Failed to check server state
237
+
238
+ def change_port(self, _, __, new_port):
239
+ """
240
+ Change the server port and restart the server.
241
+ """
242
+ try:
243
+ # Stop the current server
244
+ if self.server_thread and self.server_thread.is_alive():
245
+ # Set should_exit flag on the uvicorn server instance
246
+ if (
247
+ hasattr(self.server, "uvicorn_server")
248
+ and self.server.uvicorn_server
249
+ ):
250
+ self.server.uvicorn_server.should_exit = True
251
+ self.server_thread.join(timeout=2)
252
+
253
+ # Update the port
254
+ self.port = new_port
255
+
256
+ # Restart the server
257
+ self.server_thread = threading.Thread(target=self.start_server, daemon=True)
258
+ self.server_thread.start()
259
+
260
+ # Show notification
261
+ self.show_balloon_notification(
262
+ "Port Changed", f"Lemonade Server is now running on port {self.port}"
263
+ )
264
+
265
+ except Exception as e: # pylint: disable=broad-exception-caught
266
+ self.logger.error(f"Error changing port: {str(e)}")
267
+ self.show_balloon_notification("Error", f"Failed to change port: {str(e)}")
268
+
269
+ def upgrade_to_latest(self, icon, item):
270
+ """
271
+ Download and launch the Lemonade Server installer
272
+ """
273
+
274
+ # Show notification that download is starting
275
+ self.show_balloon_notification(
276
+ "Upgrading Lemonade",
277
+ "Downloading Lemonade Server Installer. Please wait...",
278
+ )
279
+
280
+ # Create temporary file for the installer
281
+ installer_path = os.path.join(
282
+ tempfile.gettempdir(), "Lemonade_Server_Installer.exe"
283
+ )
284
+
285
+ # Download the installer
286
+ response = requests.get(self.latest_version_url, stream=True)
287
+ response.raise_for_status()
288
+
289
+ # Save the installer to disk
290
+ with open(installer_path, "wb") as f:
291
+ for chunk in response.iter_content(chunk_size=8192):
292
+ f.write(chunk)
293
+
294
+ # Launch the installer
295
+ subprocess.Popen([installer_path], shell=True)
296
+
297
+ # Quit the application
298
+ self.exit_app(icon, item)
299
+
300
+ def create_menu(self):
301
+ """
302
+ Create the Lemonade-specific context menu.
303
+ """
304
+ # Check server health when menu is opened
305
+ status_successfully_checked = self.check_server_state()
306
+
307
+ items = []
308
+
309
+ if not status_successfully_checked:
310
+ items.append(
311
+ MenuItem("Server Busy - See Logs for details", None, enabled=False)
312
+ )
313
+ elif self.loaded_llm:
314
+ items.extend(
315
+ [
316
+ MenuItem(f"Loaded: {self.loaded_llm}", None, enabled=False),
317
+ MenuItem("Unload LLM", self.unload_llms),
318
+ ]
319
+ )
320
+ else:
321
+ items.extend(
322
+ [
323
+ MenuItem("No models loaded", None, enabled=False),
324
+ ]
325
+ )
326
+
327
+ # Create menu items for all downloaded models
328
+ model_menu_items = []
329
+ for model_name, _ in self.downloaded_models.items():
330
+ # Create a function that returns the lambda to properly capture the variables
331
+ def create_handler(mod):
332
+ return lambda icon, item: self.load_llm(icon, item, mod)
333
+
334
+ model_item = MenuItem(model_name, create_handler(model_name))
335
+
336
+ # Set checked property instead of modifying the text
337
+ model_item.checked = model_name == self.loaded_llm
338
+ model_menu_items.append(model_item)
339
+
340
+ load_submenu = Menu(*model_menu_items)
341
+
342
+ # Create port selection submenu with 3 options
343
+ port_menu_items = []
344
+ port_options = [
345
+ 8000,
346
+ 8020,
347
+ 8040,
348
+ 8060,
349
+ 8080,
350
+ 9000,
351
+ ]
352
+
353
+ for port_option in port_options:
354
+ # Create a function that returns the lambda to properly capture the port variable
355
+ def create_port_handler(port):
356
+ return lambda icon, item: self.change_port(icon, item, port)
357
+
358
+ # Set checked property instead of modifying the text
359
+ port_item = MenuItem(
360
+ f"Port {port_option}", create_port_handler(port_option)
361
+ )
362
+ port_item.checked = port_option == self.port
363
+ port_menu_items.append(port_item)
364
+
365
+ port_submenu = Menu(*port_menu_items)
366
+
367
+ if status_successfully_checked:
368
+ items.append(MenuItem("Load Model", None, submenu=load_submenu))
369
+ items.append(MenuItem("Port", None, submenu=port_submenu))
370
+ items.append(Menu.SEPARATOR)
371
+
372
+ # Only show upgrade option if newer version is available
373
+ if parse_version(self.latest_version) > parse_version(self.current_version):
374
+ items.append(
375
+ MenuItem(
376
+ f"Upgrade to version {self.latest_version}", self.upgrade_to_latest
377
+ )
378
+ )
379
+
380
+ items.append(MenuItem("Documentation", self.open_documentation))
381
+ items.append(MenuItem("Model Manager", self.open_model_manager))
382
+ items.append(MenuItem("Show Logs", self.show_logs))
383
+ items.append(Menu.SEPARATOR)
384
+ items.append(MenuItem("Quit Lemonade", self.exit_app))
385
+ return Menu(*items)
386
+
387
+ def start_server(self):
388
+ """
389
+ Start the uvicorn server.
390
+ """
391
+ self.server = self.server_factory()
392
+ self.server.uvicorn_server = self.server.run_in_thread(port=self.port)
393
+ self.server.uvicorn_server.run()
394
+
395
+ def run(self):
396
+ """
397
+ Run the Lemonade tray application.
398
+ """
399
+
400
+ # Register window class and create window
401
+ self.register_window_class()
402
+ self.create_window()
403
+
404
+ # Set up Windows console control handler for CTRL+C
405
+ self.console_handler = self.setup_console_control_handler(self.logger)
406
+
407
+ # Add tray icon
408
+ self.add_tray_icon()
409
+
410
+ # Start the background model mapping update thread
411
+ self.model_update_thread = threading.Thread(
412
+ target=self.update_downloaded_models_background, daemon=True
413
+ )
414
+ self.model_update_thread.start()
415
+
416
+ # Start the version check thread
417
+ self.version_check_thread = threading.Thread(
418
+ target=self.update_version_background, daemon=True
419
+ )
420
+ self.version_check_thread.start()
421
+
422
+ # Start the server in a separate thread
423
+ self.server_thread = threading.Thread(target=self.start_server, daemon=True)
424
+ self.server_thread.start()
425
+
426
+ # Show initial notification
427
+ self.show_balloon_notification(
428
+ "Woohoo!",
429
+ (
430
+ "Lemonade Server is running! "
431
+ "Right-click the tray icon below to access options."
432
+ ),
433
+ )
434
+
435
+ # Run the message loop in the main thread
436
+ self.message_loop()
437
+
438
+ def exit_app(self, icon, item):
439
+ """
440
+ Exit the application.
441
+ """
442
+ # Stop the background threads
443
+ self.stop_model_update.set()
444
+ self.stop_version_check.set()
445
+
446
+ if self.model_update_thread and self.model_update_thread.is_alive():
447
+ self.model_update_thread.join(timeout=1)
448
+ if self.version_check_thread and self.version_check_thread.is_alive():
449
+ self.version_check_thread.join(timeout=1)
450
+
451
+ # Call parent exit method
452
+ super().exit_app(icon, item)
453
+
454
+ # Stop the server using the CLI stop command to ensure a rigorous cleanup
455
+ # This must be a subprocess to ensure the cleanup doesnt kill itself
456
+ subprocess.Popen(
457
+ [sys.executable, "-m", "lemonade_server.cli", "stop"], shell=True
458
+ )
@@ -1,9 +1,16 @@
1
1
  import socketserver
2
2
  import sys
3
3
  import logging
4
+ import importlib
5
+ import asyncio
4
6
  from contextlib import asynccontextmanager
5
7
  from fastapi import FastAPI
6
8
 
9
+ _lazy_imports = {
10
+ "TextIteratorStreamer": ("transformers", "TextIteratorStreamer"),
11
+ "StoppingCriteriaList": ("transformers", "StoppingCriteriaList"),
12
+ }
13
+
7
14
 
8
15
  def find_free_port():
9
16
  """
@@ -23,9 +30,7 @@ def find_free_port():
23
30
 
24
31
  @asynccontextmanager
25
32
  async def lifespan(app: FastAPI):
26
- # Code here will run when the application starts up
27
- # Check if console can handle Unicode by testing emoji encoding
28
-
33
+ # Only do minimal setup here so endpoints are available immediately
29
34
  try:
30
35
  if sys.stdout.encoding:
31
36
  "🍋".encode(sys.stdout.encoding)
@@ -54,4 +59,18 @@ async def lifespan(app: FastAPI):
54
59
  "[Lemonade] docs\n"
55
60
  )
56
61
 
62
+ # Start lazy imports in the background, and set app.initialized = True
63
+ # when the imports are available
64
+ async def lazy_imports_bg():
65
+ for object_name, import_info in _lazy_imports.items():
66
+ module_name = import_info[0]
67
+ class_name = import_info[1]
68
+ module = importlib.import_module(module_name)
69
+ obj = getattr(module, class_name)
70
+ globals()[object_name] = obj
71
+
72
+ app.initialized = True
73
+
74
+ asyncio.create_task(lazy_imports_bg())
75
+
57
76
  yield