lemonade-sdk 8.1.0__py3-none-any.whl → 8.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lemonade-sdk might be problematic. Click here for more details.

@@ -1,5 +1,4 @@
1
1
  import sys
2
- import argparse
3
2
  import asyncio
4
3
  import statistics
5
4
  import time
@@ -48,6 +47,11 @@ from openai.types.responses import (
48
47
  )
49
48
 
50
49
  import lemonade.api as lemonade_api
50
+ import lemonade.tools.server.llamacpp as llamacpp
51
+ from lemonade.tools.server.tool_calls import extract_tool_calls, get_tool_call_pattern
52
+ from lemonade.tools.server.webapp import get_webapp_html
53
+ from lemonade.tools.server.utils.port import lifespan
54
+
51
55
  from lemonade_server.model_manager import ModelManager
52
56
  from lemonade_server.pydantic_models import (
53
57
  DEFAULT_MAX_NEW_TOKENS,
@@ -60,18 +64,17 @@ from lemonade_server.pydantic_models import (
60
64
  PullConfig,
61
65
  DeleteConfig,
62
66
  )
63
- from lemonade.tools.management_tools import ManagementTool
64
- import lemonade.tools.server.llamacpp as llamacpp
65
- from lemonade.tools.server.tool_calls import extract_tool_calls, get_tool_call_pattern
66
- from lemonade.tools.server.webapp import get_webapp_html
67
- from lemonade.tools.server.utils.port import lifespan
68
67
 
69
68
  # Only import tray on Windows
70
69
  if platform.system() == "Windows":
70
+ # pylint: disable=ungrouped-imports
71
71
  from lemonade.tools.server.tray import LemonadeTray, OutputDuplicator
72
72
 
73
+
73
74
  DEFAULT_PORT = 8000
74
75
  DEFAULT_LOG_LEVEL = "info"
76
+ DEFAULT_LLAMACPP_BACKEND = "vulkan"
77
+ DEFAULT_CTX_SIZE = 4096
75
78
 
76
79
 
77
80
  class ServerModel(Model):
@@ -126,7 +129,7 @@ class StopOnEvent:
126
129
  return self.stop_event.is_set()
127
130
 
128
131
 
129
- class Server(ManagementTool):
132
+ class Server:
130
133
  """
131
134
  Open a web server that apps can use to communicate with the LLM.
132
135
 
@@ -144,11 +147,25 @@ class Server(ManagementTool):
144
147
  - /api/v1/models: list all available models.
145
148
  """
146
149
 
147
- unique_name = "serve"
148
-
149
- def __init__(self):
150
+ def __init__(
151
+ self,
152
+ port: int = DEFAULT_PORT,
153
+ log_level: str = DEFAULT_LOG_LEVEL,
154
+ ctx_size: int = DEFAULT_CTX_SIZE,
155
+ tray: bool = False,
156
+ log_file: str = None,
157
+ llamacpp_backend: str = DEFAULT_LLAMACPP_BACKEND,
158
+ ):
150
159
  super().__init__()
151
160
 
161
+ # Save args as members
162
+ self.port = port
163
+ self.log_level = log_level
164
+ self.ctx_size = ctx_size
165
+ self.tray = tray
166
+ self.log_file = log_file
167
+ self.llamacpp_backend = llamacpp_backend
168
+
152
169
  # Initialize FastAPI app
153
170
  self.app = FastAPI(lifespan=lifespan)
154
171
 
@@ -186,9 +203,6 @@ class Server(ManagementTool):
186
203
  self.output_tokens = None
187
204
  self.decode_token_times = None
188
205
 
189
- # Input truncation settings
190
- self.truncate_inputs = False
191
-
192
206
  # Store debug logging state
193
207
  self.debug_logging_enabled = logging.getLogger().isEnabledFor(logging.DEBUG)
194
208
 
@@ -241,66 +255,18 @@ class Server(ManagementTool):
241
255
  self.app.post(f"{prefix}/reranking")(self.reranking)
242
256
  self.app.post(f"{prefix}/rerank")(self.reranking)
243
257
 
244
- @staticmethod
245
- def parser(add_help: bool = True) -> argparse.ArgumentParser:
246
- parser = __class__.helpful_parser(
247
- short_description="Launch an industry-standard LLM server",
248
- add_help=add_help,
249
- )
250
-
251
- # Only add the tray option on Windows
252
- if platform.system() == "Windows":
253
- parser.add_argument(
254
- "--tray",
255
- action="store_true",
256
- help="Run the server in system tray mode",
257
- )
258
-
259
- parser.add_argument(
260
- "--port",
261
- required=False,
262
- type=int,
263
- default=DEFAULT_PORT,
264
- help=f"Port number to run the server on (default: {DEFAULT_PORT})",
265
- )
266
- parser.add_argument(
267
- "--log-level",
268
- required=False,
269
- type=str,
270
- default=DEFAULT_LOG_LEVEL,
271
- choices=["critical", "error", "warning", "info", "debug", "trace"],
272
- help=f"Logging level (default: {DEFAULT_LOG_LEVEL})",
273
- )
274
-
275
- parser.add_argument(
276
- "--log-file",
277
- required=False,
278
- type=str,
279
- help="Path to the log file",
280
- )
281
-
282
- return parser
283
-
284
258
  def _setup_server_common(
285
259
  self,
286
- port: int,
287
- truncate_inputs: Optional[int] = None,
288
- log_level: str = DEFAULT_LOG_LEVEL,
289
260
  tray: bool = False,
290
- log_file: str = None,
291
261
  threaded_mode: bool = False,
292
262
  ):
293
263
  """
294
264
  Common setup logic shared between run() and run_in_thread().
295
265
 
296
266
  Args:
297
- port: Port number for the server
298
- truncate_inputs: Truncate messages to this length
299
- log_level: Logging level to configure
267
+ tray: Whether to run the server in tray mode
300
268
  threaded_mode: Whether this is being set up for threaded execution
301
269
  """
302
- # Store truncation settings
303
- self.truncate_inputs = truncate_inputs
304
270
 
305
271
  # Define TRACE level
306
272
  logging.TRACE = 9 # Lower than DEBUG which is 10
@@ -318,18 +284,20 @@ class Server(ManagementTool):
318
284
  logging.getLogger("uvicorn.error").setLevel(logging.WARNING)
319
285
  else:
320
286
  # Configure logging to match uvicorn's format
321
- logging_level = getattr(logging, log_level.upper())
287
+ logging_level = getattr(logging, self.log_level.upper())
322
288
 
323
289
  # Set up file handler for logging to lemonade.log
324
290
  uvicorn_formatter = uvicorn.logging.DefaultFormatter(
325
291
  fmt="%(levelprefix)s %(message)s",
326
292
  use_colors=True,
327
293
  )
328
- if not log_file:
329
- log_file = tempfile.NamedTemporaryFile(
294
+ if not self.log_file:
295
+ self.log_file = tempfile.NamedTemporaryFile(
330
296
  prefix="lemonade_", suffix=".log", delete=False
331
297
  ).name
332
- file_handler = logging.FileHandler(log_file, mode="a", encoding="utf-8")
298
+ file_handler = logging.FileHandler(
299
+ self.log_file, mode="a", encoding="utf-8"
300
+ )
333
301
  file_handler.setLevel(logging_level)
334
302
  file_handler.setFormatter(uvicorn_formatter)
335
303
 
@@ -349,12 +317,12 @@ class Server(ManagementTool):
349
317
  self.debug_logging_enabled = logging.getLogger().isEnabledFor(logging.DEBUG)
350
318
  if tray:
351
319
  # Save original stdout/stderr
352
- sys.stdout = OutputDuplicator(log_file, sys.stdout)
353
- sys.stderr = OutputDuplicator(log_file, sys.stderr)
320
+ sys.stdout = OutputDuplicator(self.log_file, sys.stdout)
321
+ sys.stderr = OutputDuplicator(self.log_file, sys.stderr)
354
322
 
355
323
  # Open lemonade server in tray mode
356
324
  # lambda function used for deferred instantiation and thread safety
357
- LemonadeTray(log_file, port, lambda: Server()).run()
325
+ LemonadeTray(self.log_file, self.port, lambda: self).run()
358
326
  sys.exit(0)
359
327
 
360
328
  if self.debug_logging_enabled:
@@ -363,47 +331,26 @@ class Server(ManagementTool):
363
331
 
364
332
  # Let the app know what port it's running on, so
365
333
  # that the lifespan can access it
366
- self.app.port = port
334
+ self.app.port = self.port
367
335
 
368
- def run(
369
- self,
370
- # ManagementTool has a required cache_dir arg, but
371
- # we always use the default cache directory
372
- _=None,
373
- port: int = DEFAULT_PORT,
374
- log_level: str = DEFAULT_LOG_LEVEL,
375
- truncate_inputs: Optional[int] = None,
376
- tray: bool = False,
377
- log_file: str = None,
378
- ):
336
+ def run(self):
379
337
  # Common setup
380
338
  self._setup_server_common(
381
- port=port,
382
- truncate_inputs=truncate_inputs,
383
- log_level=log_level,
384
339
  threaded_mode=False,
385
- tray=tray,
386
- log_file=log_file,
340
+ tray=self.tray,
387
341
  )
388
342
 
389
- uvicorn.run(self.app, host="localhost", port=port, log_level=log_level)
343
+ uvicorn.run(
344
+ self.app, host="localhost", port=self.port, log_level=self.log_level
345
+ )
390
346
 
391
- def run_in_thread(
392
- self,
393
- port: int = DEFAULT_PORT,
394
- host: str = "localhost",
395
- log_level: str = "warning",
396
- truncate_inputs: Optional[int] = None,
397
- ):
347
+ def run_in_thread(self, host: str = "localhost"):
398
348
  """
399
349
  Set up the server for running in a thread.
400
350
  Returns a uvicorn server instance that can be controlled externally.
401
351
  """
402
352
  # Common setup
403
353
  self._setup_server_common(
404
- port=port,
405
- truncate_inputs=truncate_inputs,
406
- log_level=log_level,
407
354
  threaded_mode=True,
408
355
  tray=False,
409
356
  )
@@ -418,8 +365,8 @@ class Server(ManagementTool):
418
365
  config = Config(
419
366
  app=self.app,
420
367
  host=host,
421
- port=port,
422
- log_level=log_level,
368
+ port=self.port,
369
+ log_level=self.log_level,
423
370
  log_config=None,
424
371
  )
425
372
 
@@ -1099,18 +1046,19 @@ class Server(ManagementTool):
1099
1046
  )
1100
1047
  self.input_tokens = len(input_ids[0])
1101
1048
 
1102
- if self.truncate_inputs and self.truncate_inputs > self.input_tokens:
1049
+ # For non-llamacpp recipes, truncate inputs to ctx_size if needed
1050
+ if self.llm_loaded.recipe != "llamacpp" and self.input_tokens > self.ctx_size:
1103
1051
  # Truncate input ids
1104
- truncate_amount = self.input_tokens - self.truncate_inputs
1105
- input_ids = input_ids[: self.truncate_inputs]
1052
+ truncate_amount = self.input_tokens - self.ctx_size
1053
+ input_ids = input_ids[: self.ctx_size]
1106
1054
 
1107
1055
  # Update token count
1108
1056
  self.input_tokens = len(input_ids)
1109
1057
 
1110
1058
  # Show warning message
1111
1059
  truncation_message = (
1112
- f"Input exceeded {self.truncate_inputs} tokens. "
1113
- f"Truncated {truncate_amount} tokens."
1060
+ f"Input exceeded {self.ctx_size} tokens. "
1061
+ f"Truncated {truncate_amount} tokens from the beginning."
1114
1062
  )
1115
1063
  logging.warning(truncation_message)
1116
1064
 
@@ -1429,6 +1377,8 @@ class Server(ManagementTool):
1429
1377
  self.llama_server_process = llamacpp.server_load(
1430
1378
  model_config=config_to_use,
1431
1379
  telemetry=self.llama_telemetry,
1380
+ backend=self.llamacpp_backend,
1381
+ ctx_size=self.ctx_size,
1432
1382
  )
1433
1383
 
1434
1384
  else:
@@ -416,6 +416,37 @@ body::before {
416
416
  color: #222;
417
417
  }
418
418
 
419
+ .input-with-indicator {
420
+ flex: 1;
421
+ position: relative;
422
+ display: flex;
423
+ align-items: center;
424
+ }
425
+
426
+ .input-with-indicator input[type='text'] {
427
+ flex: 1;
428
+ padding: 0.5em;
429
+ border: 1px solid #ddd;
430
+ border-radius: 4px;
431
+ background: #fff;
432
+ color: #222;
433
+ margin: 0;
434
+ }
435
+
436
+ #attachment-indicator {
437
+ position: absolute;
438
+ right: 8px;
439
+ top: 50%;
440
+ transform: translateY(-50%);
441
+ font-size: 14px;
442
+ color: #666;
443
+ pointer-events: none;
444
+ background: rgba(255, 255, 255, 0.9);
445
+ padding: 2px 4px;
446
+ border-radius: 3px;
447
+ border: 1px solid #ddd;
448
+ }
449
+
419
450
  .chat-input-row button {
420
451
  padding: 0.5em 1.2em;
421
452
  background: #e6b800;
@@ -427,6 +458,29 @@ body::before {
427
458
  font-weight: 600;
428
459
  }
429
460
 
461
+ #attachment-btn {
462
+ padding: 0.5em 0.8em;
463
+ background: #f0f0f0;
464
+ color: #222;
465
+ border: 1px solid #ddd;
466
+ }
467
+
468
+ #attachment-btn:hover {
469
+ background: #e0e0e0;
470
+ }
471
+
472
+ #clear-attachments-btn {
473
+ padding: 0.5em 0.6em;
474
+ background: #ff6b6b;
475
+ color: white;
476
+ border: 1px solid #ff5252;
477
+ margin-left: 0.2em;
478
+ }
479
+
480
+ #clear-attachments-btn:hover {
481
+ background: #ff5252;
482
+ }
483
+
430
484
  .chat-input-row button:hover {
431
485
  background: #d4a500;
432
486
  }
@@ -437,6 +491,121 @@ body::before {
437
491
  cursor: not-allowed;
438
492
  }
439
493
 
494
+ /* Image attachment preview styles */
495
+ .attachments-preview-container {
496
+ padding: 0.5em 1em 0 1em;
497
+ background: #f9f9f9;
498
+ border-top: 1px solid #e0e0e0;
499
+ display: none;
500
+ }
501
+
502
+ .attachments-preview-container.has-attachments {
503
+ display: block;
504
+ }
505
+
506
+ .attachments-preview-row {
507
+ display: flex;
508
+ gap: 8px;
509
+ align-items: center;
510
+ flex-wrap: wrap;
511
+ }
512
+
513
+ .attachment-preview {
514
+ display: flex;
515
+ align-items: center;
516
+ gap: 6px;
517
+ padding: 4px 8px;
518
+ background: #fff;
519
+ border: 1px solid #ddd;
520
+ border-radius: 4px;
521
+ box-shadow: 0 1px 2px rgba(0,0,0,0.05);
522
+ transition: all 0.2s ease;
523
+ font-size: 0.85em;
524
+ position: relative;
525
+ }
526
+
527
+ .attachment-preview:hover {
528
+ box-shadow: 0 2px 4px rgba(0,0,0,0.1);
529
+ background: #fafafa;
530
+ }
531
+
532
+ .attachment-thumbnail {
533
+ width: 20px;
534
+ height: 20px;
535
+ border-radius: 2px;
536
+ object-fit: cover;
537
+ background: #f8f8f8;
538
+ border: 1px solid #e0e0e0;
539
+ flex-shrink: 0;
540
+ }
541
+
542
+ .attachment-filename {
543
+ color: #666;
544
+ max-width: 120px;
545
+ overflow: hidden;
546
+ text-overflow: ellipsis;
547
+ white-space: nowrap;
548
+ font-size: 0.9em;
549
+ line-height: 1;
550
+ }
551
+
552
+ .attachment-remove-btn {
553
+ background: none;
554
+ border: none;
555
+ color: #999;
556
+ cursor: pointer;
557
+ font-size: 14px;
558
+ padding: 0 2px;
559
+ margin-left: 4px;
560
+ transition: color 0.2s ease;
561
+ flex-shrink: 0;
562
+ }
563
+
564
+ .attachment-remove-btn:hover {
565
+ color: #ff6b6b;
566
+ }
567
+
568
+ .attachment-remove-btn:active {
569
+ transform: scale(0.9);
570
+ }
571
+
572
+ /* Fallback for non-image files or broken images */
573
+ .attachment-preview.no-preview .attachment-thumbnail {
574
+ display: flex;
575
+ align-items: center;
576
+ justify-content: center;
577
+ background: linear-gradient(135deg, #f8f9fa 0%, #e9ecef 100%);
578
+ border: 1px dashed #dee2e6;
579
+ color: #6c757d;
580
+ font-size: 12px;
581
+ }
582
+
583
+ /* Mobile responsive adjustments */
584
+ @media (max-width: 600px) {
585
+ .attachments-preview-row {
586
+ gap: 6px;
587
+ }
588
+
589
+ .attachment-preview {
590
+ padding: 3px 6px;
591
+ gap: 4px;
592
+ }
593
+
594
+ .attachment-thumbnail {
595
+ width: 18px;
596
+ height: 18px;
597
+ }
598
+
599
+ .attachment-filename {
600
+ max-width: 100px;
601
+ font-size: 0.8em;
602
+ }
603
+
604
+ .attachment-remove-btn {
605
+ font-size: 12px;
606
+ }
607
+ }
608
+
440
609
  /* Model Management */
441
610
  .model-mgmt-container {
442
611
  display: flex;
@@ -1377,3 +1546,37 @@ body::before {
1377
1546
  from { opacity: 0; transform: translateY(-5px); }
1378
1547
  to { opacity: 1; transform: translateY(0); }
1379
1548
  }
1549
+
1550
+ /* Error banner styles */
1551
+ .error-banner {
1552
+ position: fixed;
1553
+ top: 10px;
1554
+ left: 50%;
1555
+ transform: translateX(-50%);
1556
+ background-color: #dc3545;
1557
+ color: #fff;
1558
+ padding: 0.6em 1.2em;
1559
+ border-radius: 6px;
1560
+ box-shadow: 0 2px 8px rgba(0,0,0,0.2);
1561
+ z-index: 10000;
1562
+ font-weight: 600;
1563
+ white-space: pre-line;
1564
+ display: none;
1565
+ animation: fadeIn 0.2s ease;
1566
+ align-items: center;
1567
+ }
1568
+
1569
+ .error-banner .close-btn {
1570
+ background: none;
1571
+ border: none;
1572
+ color: #fff;
1573
+ font-size: 1.2em;
1574
+ margin-left: 0.8em;
1575
+ cursor: pointer;
1576
+ padding: 0;
1577
+ line-height: 1;
1578
+ }
1579
+
1580
+ .error-banner .close-btn:hover {
1581
+ opacity: 0.8;
1582
+ }