lemonade-sdk 8.1.0__py3-none-any.whl → 8.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lemonade-sdk might be problematic. Click here for more details.

@@ -1,5 +1,4 @@
1
1
  import sys
2
- import argparse
3
2
  import asyncio
4
3
  import statistics
5
4
  import time
@@ -48,6 +47,11 @@ from openai.types.responses import (
48
47
  )
49
48
 
50
49
  import lemonade.api as lemonade_api
50
+ import lemonade.tools.server.llamacpp as llamacpp
51
+ from lemonade.tools.server.tool_calls import extract_tool_calls, get_tool_call_pattern
52
+ from lemonade.tools.server.webapp import get_webapp_html
53
+ from lemonade.tools.server.utils.port import lifespan
54
+
51
55
  from lemonade_server.model_manager import ModelManager
52
56
  from lemonade_server.pydantic_models import (
53
57
  DEFAULT_MAX_NEW_TOKENS,
@@ -60,18 +64,18 @@ from lemonade_server.pydantic_models import (
60
64
  PullConfig,
61
65
  DeleteConfig,
62
66
  )
63
- from lemonade.tools.management_tools import ManagementTool
64
- import lemonade.tools.server.llamacpp as llamacpp
65
- from lemonade.tools.server.tool_calls import extract_tool_calls, get_tool_call_pattern
66
- from lemonade.tools.server.webapp import get_webapp_html
67
- from lemonade.tools.server.utils.port import lifespan
68
67
 
69
68
  # Only import tray on Windows
70
69
  if platform.system() == "Windows":
70
+ # pylint: disable=ungrouped-imports
71
71
  from lemonade.tools.server.tray import LemonadeTray, OutputDuplicator
72
72
 
73
+
73
74
  DEFAULT_PORT = 8000
75
+ DEFAULT_HOST = "localhost"
74
76
  DEFAULT_LOG_LEVEL = "info"
77
+ DEFAULT_LLAMACPP_BACKEND = "vulkan"
78
+ DEFAULT_CTX_SIZE = 4096
75
79
 
76
80
 
77
81
  class ServerModel(Model):
@@ -126,7 +130,7 @@ class StopOnEvent:
126
130
  return self.stop_event.is_set()
127
131
 
128
132
 
129
- class Server(ManagementTool):
133
+ class Server:
130
134
  """
131
135
  Open a web server that apps can use to communicate with the LLM.
132
136
 
@@ -144,11 +148,27 @@ class Server(ManagementTool):
144
148
  - /api/v1/models: list all available models.
145
149
  """
146
150
 
147
- unique_name = "serve"
148
-
149
- def __init__(self):
151
+ def __init__(
152
+ self,
153
+ port: int = DEFAULT_PORT,
154
+ host: str = DEFAULT_HOST,
155
+ log_level: str = DEFAULT_LOG_LEVEL,
156
+ ctx_size: int = DEFAULT_CTX_SIZE,
157
+ tray: bool = False,
158
+ log_file: str = None,
159
+ llamacpp_backend: str = DEFAULT_LLAMACPP_BACKEND,
160
+ ):
150
161
  super().__init__()
151
162
 
163
+ # Save args as members
164
+ self.port = port
165
+ self.host = host
166
+ self.log_level = log_level
167
+ self.ctx_size = ctx_size
168
+ self.tray = tray
169
+ self.log_file = log_file
170
+ self.llamacpp_backend = llamacpp_backend
171
+
152
172
  # Initialize FastAPI app
153
173
  self.app = FastAPI(lifespan=lifespan)
154
174
 
@@ -186,9 +206,6 @@ class Server(ManagementTool):
186
206
  self.output_tokens = None
187
207
  self.decode_token_times = None
188
208
 
189
- # Input truncation settings
190
- self.truncate_inputs = False
191
-
192
209
  # Store debug logging state
193
210
  self.debug_logging_enabled = logging.getLogger().isEnabledFor(logging.DEBUG)
194
211
 
@@ -241,66 +258,18 @@ class Server(ManagementTool):
241
258
  self.app.post(f"{prefix}/reranking")(self.reranking)
242
259
  self.app.post(f"{prefix}/rerank")(self.reranking)
243
260
 
244
- @staticmethod
245
- def parser(add_help: bool = True) -> argparse.ArgumentParser:
246
- parser = __class__.helpful_parser(
247
- short_description="Launch an industry-standard LLM server",
248
- add_help=add_help,
249
- )
250
-
251
- # Only add the tray option on Windows
252
- if platform.system() == "Windows":
253
- parser.add_argument(
254
- "--tray",
255
- action="store_true",
256
- help="Run the server in system tray mode",
257
- )
258
-
259
- parser.add_argument(
260
- "--port",
261
- required=False,
262
- type=int,
263
- default=DEFAULT_PORT,
264
- help=f"Port number to run the server on (default: {DEFAULT_PORT})",
265
- )
266
- parser.add_argument(
267
- "--log-level",
268
- required=False,
269
- type=str,
270
- default=DEFAULT_LOG_LEVEL,
271
- choices=["critical", "error", "warning", "info", "debug", "trace"],
272
- help=f"Logging level (default: {DEFAULT_LOG_LEVEL})",
273
- )
274
-
275
- parser.add_argument(
276
- "--log-file",
277
- required=False,
278
- type=str,
279
- help="Path to the log file",
280
- )
281
-
282
- return parser
283
-
284
261
  def _setup_server_common(
285
262
  self,
286
- port: int,
287
- truncate_inputs: Optional[int] = None,
288
- log_level: str = DEFAULT_LOG_LEVEL,
289
263
  tray: bool = False,
290
- log_file: str = None,
291
264
  threaded_mode: bool = False,
292
265
  ):
293
266
  """
294
267
  Common setup logic shared between run() and run_in_thread().
295
268
 
296
269
  Args:
297
- port: Port number for the server
298
- truncate_inputs: Truncate messages to this length
299
- log_level: Logging level to configure
270
+ tray: Whether to run the server in tray mode
300
271
  threaded_mode: Whether this is being set up for threaded execution
301
272
  """
302
- # Store truncation settings
303
- self.truncate_inputs = truncate_inputs
304
273
 
305
274
  # Define TRACE level
306
275
  logging.TRACE = 9 # Lower than DEBUG which is 10
@@ -318,18 +287,20 @@ class Server(ManagementTool):
318
287
  logging.getLogger("uvicorn.error").setLevel(logging.WARNING)
319
288
  else:
320
289
  # Configure logging to match uvicorn's format
321
- logging_level = getattr(logging, log_level.upper())
290
+ logging_level = getattr(logging, self.log_level.upper())
322
291
 
323
292
  # Set up file handler for logging to lemonade.log
324
293
  uvicorn_formatter = uvicorn.logging.DefaultFormatter(
325
294
  fmt="%(levelprefix)s %(message)s",
326
295
  use_colors=True,
327
296
  )
328
- if not log_file:
329
- log_file = tempfile.NamedTemporaryFile(
297
+ if not self.log_file:
298
+ self.log_file = tempfile.NamedTemporaryFile(
330
299
  prefix="lemonade_", suffix=".log", delete=False
331
300
  ).name
332
- file_handler = logging.FileHandler(log_file, mode="a", encoding="utf-8")
301
+ file_handler = logging.FileHandler(
302
+ self.log_file, mode="a", encoding="utf-8"
303
+ )
333
304
  file_handler.setLevel(logging_level)
334
305
  file_handler.setFormatter(uvicorn_formatter)
335
306
 
@@ -349,12 +320,12 @@ class Server(ManagementTool):
349
320
  self.debug_logging_enabled = logging.getLogger().isEnabledFor(logging.DEBUG)
350
321
  if tray:
351
322
  # Save original stdout/stderr
352
- sys.stdout = OutputDuplicator(log_file, sys.stdout)
353
- sys.stderr = OutputDuplicator(log_file, sys.stderr)
323
+ sys.stdout = OutputDuplicator(self.log_file, sys.stdout)
324
+ sys.stderr = OutputDuplicator(self.log_file, sys.stderr)
354
325
 
355
326
  # Open lemonade server in tray mode
356
327
  # lambda function used for deferred instantiation and thread safety
357
- LemonadeTray(log_file, port, lambda: Server()).run()
328
+ LemonadeTray(self.log_file, self.port, lambda: self).run()
358
329
  sys.exit(0)
359
330
 
360
331
  if self.debug_logging_enabled:
@@ -363,47 +334,27 @@ class Server(ManagementTool):
363
334
 
364
335
  # Let the app know what port it's running on, so
365
336
  # that the lifespan can access it
366
- self.app.port = port
337
+ self.app.port = self.port
338
+ # FastAPI already has a `host` function and we cannot use `_host` as
339
+ # PyLint will believe its private
340
+ self.app.host_ = self.host
367
341
 
368
- def run(
369
- self,
370
- # ManagementTool has a required cache_dir arg, but
371
- # we always use the default cache directory
372
- _=None,
373
- port: int = DEFAULT_PORT,
374
- log_level: str = DEFAULT_LOG_LEVEL,
375
- truncate_inputs: Optional[int] = None,
376
- tray: bool = False,
377
- log_file: str = None,
378
- ):
342
+ def run(self):
379
343
  # Common setup
380
344
  self._setup_server_common(
381
- port=port,
382
- truncate_inputs=truncate_inputs,
383
- log_level=log_level,
384
345
  threaded_mode=False,
385
- tray=tray,
386
- log_file=log_file,
346
+ tray=self.tray,
387
347
  )
388
348
 
389
- uvicorn.run(self.app, host="localhost", port=port, log_level=log_level)
349
+ uvicorn.run(self.app, host=self.host, port=self.port, log_level=self.log_level)
390
350
 
391
- def run_in_thread(
392
- self,
393
- port: int = DEFAULT_PORT,
394
- host: str = "localhost",
395
- log_level: str = "warning",
396
- truncate_inputs: Optional[int] = None,
397
- ):
351
+ def run_in_thread(self, host: str = "localhost"):
398
352
  """
399
353
  Set up the server for running in a thread.
400
354
  Returns a uvicorn server instance that can be controlled externally.
401
355
  """
402
356
  # Common setup
403
357
  self._setup_server_common(
404
- port=port,
405
- truncate_inputs=truncate_inputs,
406
- log_level=log_level,
407
358
  threaded_mode=True,
408
359
  tray=False,
409
360
  )
@@ -418,8 +369,8 @@ class Server(ManagementTool):
418
369
  config = Config(
419
370
  app=self.app,
420
371
  host=host,
421
- port=port,
422
- log_level=log_level,
372
+ port=self.port,
373
+ log_level=self.log_level,
423
374
  log_config=None,
424
375
  )
425
376
 
@@ -1099,18 +1050,19 @@ class Server(ManagementTool):
1099
1050
  )
1100
1051
  self.input_tokens = len(input_ids[0])
1101
1052
 
1102
- if self.truncate_inputs and self.truncate_inputs > self.input_tokens:
1053
+ # For non-llamacpp recipes, truncate inputs to ctx_size if needed
1054
+ if self.llm_loaded.recipe != "llamacpp" and self.input_tokens > self.ctx_size:
1103
1055
  # Truncate input ids
1104
- truncate_amount = self.input_tokens - self.truncate_inputs
1105
- input_ids = input_ids[: self.truncate_inputs]
1056
+ truncate_amount = self.input_tokens - self.ctx_size
1057
+ input_ids = input_ids[: self.ctx_size]
1106
1058
 
1107
1059
  # Update token count
1108
1060
  self.input_tokens = len(input_ids)
1109
1061
 
1110
1062
  # Show warning message
1111
1063
  truncation_message = (
1112
- f"Input exceeded {self.truncate_inputs} tokens. "
1113
- f"Truncated {truncate_amount} tokens."
1064
+ f"Input exceeded {self.ctx_size} tokens. "
1065
+ f"Truncated {truncate_amount} tokens from the beginning."
1114
1066
  )
1115
1067
  logging.warning(truncation_message)
1116
1068
 
@@ -1429,6 +1381,8 @@ class Server(ManagementTool):
1429
1381
  self.llama_server_process = llamacpp.server_load(
1430
1382
  model_config=config_to_use,
1431
1383
  telemetry=self.llama_telemetry,
1384
+ backend=self.llamacpp_backend,
1385
+ ctx_size=self.ctx_size,
1432
1386
  )
1433
1387
 
1434
1388
  else:
@@ -416,6 +416,37 @@ body::before {
416
416
  color: #222;
417
417
  }
418
418
 
419
+ .input-with-indicator {
420
+ flex: 1;
421
+ position: relative;
422
+ display: flex;
423
+ align-items: center;
424
+ }
425
+
426
+ .input-with-indicator input[type='text'] {
427
+ flex: 1;
428
+ padding: 0.5em;
429
+ border: 1px solid #ddd;
430
+ border-radius: 4px;
431
+ background: #fff;
432
+ color: #222;
433
+ margin: 0;
434
+ }
435
+
436
+ #attachment-indicator {
437
+ position: absolute;
438
+ right: 8px;
439
+ top: 50%;
440
+ transform: translateY(-50%);
441
+ font-size: 14px;
442
+ color: #666;
443
+ pointer-events: none;
444
+ background: rgba(255, 255, 255, 0.9);
445
+ padding: 2px 4px;
446
+ border-radius: 3px;
447
+ border: 1px solid #ddd;
448
+ }
449
+
419
450
  .chat-input-row button {
420
451
  padding: 0.5em 1.2em;
421
452
  background: #e6b800;
@@ -427,6 +458,29 @@ body::before {
427
458
  font-weight: 600;
428
459
  }
429
460
 
461
+ #attachment-btn {
462
+ padding: 0.5em 0.8em;
463
+ background: #f0f0f0;
464
+ color: #222;
465
+ border: 1px solid #ddd;
466
+ }
467
+
468
+ #attachment-btn:hover {
469
+ background: #e0e0e0;
470
+ }
471
+
472
+ #clear-attachments-btn {
473
+ padding: 0.5em 0.6em;
474
+ background: #ff6b6b;
475
+ color: white;
476
+ border: 1px solid #ff5252;
477
+ margin-left: 0.2em;
478
+ }
479
+
480
+ #clear-attachments-btn:hover {
481
+ background: #ff5252;
482
+ }
483
+
430
484
  .chat-input-row button:hover {
431
485
  background: #d4a500;
432
486
  }
@@ -437,6 +491,121 @@ body::before {
437
491
  cursor: not-allowed;
438
492
  }
439
493
 
494
+ /* Image attachment preview styles */
495
+ .attachments-preview-container {
496
+ padding: 0.5em 1em 0 1em;
497
+ background: #f9f9f9;
498
+ border-top: 1px solid #e0e0e0;
499
+ display: none;
500
+ }
501
+
502
+ .attachments-preview-container.has-attachments {
503
+ display: block;
504
+ }
505
+
506
+ .attachments-preview-row {
507
+ display: flex;
508
+ gap: 8px;
509
+ align-items: center;
510
+ flex-wrap: wrap;
511
+ }
512
+
513
+ .attachment-preview {
514
+ display: flex;
515
+ align-items: center;
516
+ gap: 6px;
517
+ padding: 4px 8px;
518
+ background: #fff;
519
+ border: 1px solid #ddd;
520
+ border-radius: 4px;
521
+ box-shadow: 0 1px 2px rgba(0,0,0,0.05);
522
+ transition: all 0.2s ease;
523
+ font-size: 0.85em;
524
+ position: relative;
525
+ }
526
+
527
+ .attachment-preview:hover {
528
+ box-shadow: 0 2px 4px rgba(0,0,0,0.1);
529
+ background: #fafafa;
530
+ }
531
+
532
+ .attachment-thumbnail {
533
+ width: 20px;
534
+ height: 20px;
535
+ border-radius: 2px;
536
+ object-fit: cover;
537
+ background: #f8f8f8;
538
+ border: 1px solid #e0e0e0;
539
+ flex-shrink: 0;
540
+ }
541
+
542
+ .attachment-filename {
543
+ color: #666;
544
+ max-width: 120px;
545
+ overflow: hidden;
546
+ text-overflow: ellipsis;
547
+ white-space: nowrap;
548
+ font-size: 0.9em;
549
+ line-height: 1;
550
+ }
551
+
552
+ .attachment-remove-btn {
553
+ background: none;
554
+ border: none;
555
+ color: #999;
556
+ cursor: pointer;
557
+ font-size: 14px;
558
+ padding: 0 2px;
559
+ margin-left: 4px;
560
+ transition: color 0.2s ease;
561
+ flex-shrink: 0;
562
+ }
563
+
564
+ .attachment-remove-btn:hover {
565
+ color: #ff6b6b;
566
+ }
567
+
568
+ .attachment-remove-btn:active {
569
+ transform: scale(0.9);
570
+ }
571
+
572
+ /* Fallback for non-image files or broken images */
573
+ .attachment-preview.no-preview .attachment-thumbnail {
574
+ display: flex;
575
+ align-items: center;
576
+ justify-content: center;
577
+ background: linear-gradient(135deg, #f8f9fa 0%, #e9ecef 100%);
578
+ border: 1px dashed #dee2e6;
579
+ color: #6c757d;
580
+ font-size: 12px;
581
+ }
582
+
583
+ /* Mobile responsive adjustments */
584
+ @media (max-width: 600px) {
585
+ .attachments-preview-row {
586
+ gap: 6px;
587
+ }
588
+
589
+ .attachment-preview {
590
+ padding: 3px 6px;
591
+ gap: 4px;
592
+ }
593
+
594
+ .attachment-thumbnail {
595
+ width: 18px;
596
+ height: 18px;
597
+ }
598
+
599
+ .attachment-filename {
600
+ max-width: 100px;
601
+ font-size: 0.8em;
602
+ }
603
+
604
+ .attachment-remove-btn {
605
+ font-size: 12px;
606
+ }
607
+ }
608
+
440
609
  /* Model Management */
441
610
  .model-mgmt-container {
442
611
  display: flex;
@@ -1377,3 +1546,37 @@ body::before {
1377
1546
  from { opacity: 0; transform: translateY(-5px); }
1378
1547
  to { opacity: 1; transform: translateY(0); }
1379
1548
  }
1549
+
1550
+ /* Error banner styles */
1551
+ .error-banner {
1552
+ position: fixed;
1553
+ top: 10px;
1554
+ left: 50%;
1555
+ transform: translateX(-50%);
1556
+ background-color: #dc3545;
1557
+ color: #fff;
1558
+ padding: 0.6em 1.2em;
1559
+ border-radius: 6px;
1560
+ box-shadow: 0 2px 8px rgba(0,0,0,0.2);
1561
+ z-index: 10000;
1562
+ font-weight: 600;
1563
+ white-space: pre-line;
1564
+ display: none;
1565
+ animation: fadeIn 0.2s ease;
1566
+ align-items: center;
1567
+ }
1568
+
1569
+ .error-banner .close-btn {
1570
+ background: none;
1571
+ border: none;
1572
+ color: #fff;
1573
+ font-size: 1.2em;
1574
+ margin-left: 0.8em;
1575
+ cursor: pointer;
1576
+ padding: 0;
1577
+ line-height: 1;
1578
+ }
1579
+
1580
+ .error-banner .close-btn:hover {
1581
+ opacity: 0.8;
1582
+ }