lemonade-sdk 8.1.5__py3-none-any.whl → 8.1.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lemonade-sdk might be problematic. Click here for more details.

@@ -9,7 +9,6 @@ import tempfile
9
9
  import traceback
10
10
  from typing import Optional, Union
11
11
  import json
12
- import subprocess
13
12
  from pathlib import Path
14
13
 
15
14
  from fastapi import FastAPI, HTTPException, status, Request
@@ -47,7 +46,8 @@ from openai.types.responses import (
47
46
  )
48
47
 
49
48
  import lemonade.api as lemonade_api
50
- import lemonade.tools.server.llamacpp as llamacpp
49
+ from lemonade.tools.server.wrapped_server import WrappedServer
50
+ from lemonade.tools.server.llamacpp import LlamaServer
51
51
  from lemonade.tools.server.tool_calls import extract_tool_calls, get_tool_call_pattern
52
52
  from lemonade.tools.server.webapp import get_webapp_html
53
53
  from lemonade.tools.server.utils.port import lifespan
@@ -232,11 +232,8 @@ class Server:
232
232
  # Add lock for load/unload operations
233
233
  self._load_lock = asyncio.Lock()
234
234
 
235
- # Subprocess handle for llama_server.exe
236
- self.llama_server_process: subprocess.Popen = None
237
-
238
- # Telemetry instance for llama server
239
- self.llama_telemetry = llamacpp.LlamaTelemetry()
235
+ # Subprocess handle for wrapped instance of llama_server.exe, etc.
236
+ self.wrapped_server: WrappedServer = None
240
237
 
241
238
  def setup_routes(self, api_prefixes: list[str]):
242
239
  for prefix in api_prefixes:
@@ -521,7 +518,7 @@ class Server:
521
518
  await self.load_llm(lc)
522
519
 
523
520
  if self.llm_loaded.recipe == "llamacpp":
524
- return llamacpp.completion(completion_request, self.llama_telemetry)
521
+ return self.wrapped_server.completion(completion_request)
525
522
 
526
523
  # Check if the model supports reasoning
527
524
  reasoning_first_token = self.llm_loaded.reasoning
@@ -656,9 +653,7 @@ class Server:
656
653
  await self.load_llm(lc)
657
654
 
658
655
  if self.llm_loaded.recipe == "llamacpp":
659
- return llamacpp.chat_completion(
660
- chat_completion_request, self.llama_telemetry
661
- )
656
+ return self.wrapped_server.chat_completion(chat_completion_request)
662
657
 
663
658
  # Convert chat messages to text using the model's chat template
664
659
  text = self.apply_chat_template(
@@ -861,7 +856,7 @@ class Server:
861
856
 
862
857
  if self.llm_loaded.recipe == "llamacpp":
863
858
  try:
864
- return llamacpp.embeddings(embeddings_request, self.llama_telemetry)
859
+ return self.wrapped_server.embeddings(embeddings_request)
865
860
  except Exception as e: # pylint: disable=broad-exception-caught
866
861
  # Check if model has embeddings label
867
862
  model_info = ModelManager().supported_models.get(
@@ -884,7 +879,7 @@ class Server:
884
879
 
885
880
  async def reranking(self, reranking_request: RerankingRequest):
886
881
  """
887
- Rerank documents based on their relevance to a query using the llamacpp server.
882
+ Rerank documents based on their relevance to a query.
888
883
  """
889
884
  # Initialize load config from reranking request
890
885
  lc = LoadConfig(model_name=reranking_request.model)
@@ -894,7 +889,7 @@ class Server:
894
889
 
895
890
  if self.llm_loaded.recipe == "llamacpp":
896
891
  try:
897
- return llamacpp.reranking(reranking_request, self.llama_telemetry)
892
+ return self.wrapped_server.reranking(reranking_request)
898
893
  except Exception as e: # pylint: disable=broad-exception-caught
899
894
  # Check if model has reranking label
900
895
  model_info = ModelManager().supported_models.get(
@@ -1287,7 +1282,7 @@ class Server:
1287
1282
  """
1288
1283
  # If using llama server, get telemetry from the telemetry instance
1289
1284
  if self.llm_loaded and self.llm_loaded.recipe == "llamacpp":
1290
- return self.llama_telemetry.get_telemetry_data()
1285
+ return self.wrapped_server.telemetry.get_telemetry_data()
1291
1286
 
1292
1287
  # For built-in server, use the existing telemetry
1293
1288
  return {
@@ -1466,9 +1461,9 @@ class Server:
1466
1461
  ):
1467
1462
  if (
1468
1463
  self.llm_loaded.recipe == "llamacpp"
1469
- and self.llama_server_process.poll()
1464
+ and self.wrapped_server.process.poll()
1470
1465
  ):
1471
- # llama-server process has gone away for some reason, so we should
1466
+ # wrapped server process has gone away for some reason, so we should
1472
1467
  # proceed with loading to get it back
1473
1468
  pass
1474
1469
  else:
@@ -1484,12 +1479,10 @@ class Server:
1484
1479
  logging.info(f"Loading llm: {config.model_name}")
1485
1480
  try:
1486
1481
  if config_to_use.recipe == "llamacpp":
1487
- self.llama_server_process = llamacpp.server_load(
1482
+ self.wrapped_server = LlamaServer(self.llamacpp_backend)
1483
+ self.wrapped_server.load(
1488
1484
  model_config=config_to_use,
1489
- telemetry=self.llama_telemetry,
1490
- backend=self.llamacpp_backend,
1491
1485
  ctx_size=self.ctx_size,
1492
- # Models should only upgrade when using the pull endpoint
1493
1486
  do_not_upgrade=True,
1494
1487
  )
1495
1488
 
@@ -1530,7 +1523,7 @@ class Server:
1530
1523
  await self._generate_semaphore.acquire()
1531
1524
 
1532
1525
  if self.llm_loaded.recipe == "llamacpp":
1533
- self.llama_server_process.terminate()
1526
+ self.wrapped_server.process.terminate()
1534
1527
 
1535
1528
  self.llm_loaded = None
1536
1529
  self.tokenizer = None
@@ -0,0 +1,485 @@
1
+ import logging
2
+ import time
3
+ import subprocess
4
+ from abc import ABC, abstractmethod
5
+
6
+ import requests
7
+ from tabulate import tabulate
8
+ from fastapi import HTTPException, status
9
+ from fastapi.responses import StreamingResponse
10
+
11
+ from openai import OpenAI
12
+
13
+ from lemonade_server.pydantic_models import (
14
+ ChatCompletionRequest,
15
+ CompletionRequest,
16
+ PullConfig,
17
+ EmbeddingsRequest,
18
+ RerankingRequest,
19
+ )
20
+ from lemonade_server.model_manager import ModelManager
21
+ from lemonade.tools.server.utils.port import find_free_port
22
+
23
+
24
+ class WrappedServerTelemetry(ABC):
25
+ """
26
+ Manages telemetry data collection and display for wrapped server.
27
+ """
28
+
29
+ def __init__(self):
30
+ self.input_tokens = None
31
+ self.output_tokens = None
32
+ self.time_to_first_token = None
33
+ self.tokens_per_second = None
34
+ self.prompt_eval_time = None
35
+ self.eval_time = None
36
+
37
+ @abstractmethod
38
+ def parse_telemetry_line(self, line: str):
39
+ """
40
+ Parse telemetry data from wrapped server output lines.
41
+ """
42
+
43
+ def get_telemetry_data(self):
44
+ return {
45
+ "input_tokens": self.input_tokens,
46
+ "output_tokens": self.output_tokens,
47
+ "time_to_first_token": self.time_to_first_token,
48
+ "tokens_per_second": self.tokens_per_second,
49
+ "decode_token_times": None,
50
+ }
51
+
52
+ def show_telemetry(self):
53
+ # Check if debug logging is enabled
54
+ if not logging.getLogger().isEnabledFor(logging.DEBUG):
55
+ return
56
+
57
+ # Prepare telemetry data (transposed format)
58
+ telemetry = [
59
+ ["Input tokens", self.input_tokens],
60
+ ["Output tokens", self.output_tokens],
61
+ ["TTFT (s)", f"{self.time_to_first_token:.2f}"],
62
+ ["TPS", f"{self.tokens_per_second:.2f}"],
63
+ ]
64
+
65
+ table = tabulate(
66
+ telemetry, headers=["Metric", "Value"], tablefmt="fancy_grid"
67
+ ).split("\n")
68
+
69
+ # Show telemetry in debug while complying with uvicorn's log indentation
70
+ logging.debug("\n ".join(table))
71
+
72
+
73
+ class WrappedServer(ABC):
74
+ """
75
+ Abstract base class that defines the interface for Lemonade to "wrap" a server
76
+ like llama-server.
77
+ """
78
+
79
+ def __init__(self, server_name: str, telemetry: WrappedServerTelemetry):
80
+ self.port: int = None
81
+ self.process: subprocess.Popen = None
82
+ self.server_name: str = server_name
83
+ self.telemetry: WrappedServerTelemetry = telemetry
84
+
85
+ def choose_port(self):
86
+ """
87
+ Users probably don't care what port we start the wrapped server on, so let's
88
+ search for an empty port
89
+ """
90
+
91
+ self.port = find_free_port()
92
+
93
+ if self.port is None:
94
+ msg = f"Failed to find an empty port to start {self.server_name} on"
95
+ logging.error(msg)
96
+ raise HTTPException(
97
+ status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
98
+ detail=msg,
99
+ )
100
+
101
+ def address(self) -> str:
102
+ """
103
+ Generate the base URL for the server.
104
+
105
+ Returns:
106
+ The base URL for the wrapped server
107
+ """
108
+ return f"http://127.0.0.1:{self.port}/v1"
109
+
110
+ def _separate_openai_params(
111
+ self,
112
+ request_dict: dict,
113
+ endpoint_type: str = "chat",
114
+ ) -> dict:
115
+ """
116
+ Separate standard OpenAI parameters from custom wrapped server parameters.
117
+
118
+ Args:
119
+ request_dict: Dictionary of all request parameters
120
+ endpoint_type: Type of endpoint ("chat" or "completion")
121
+
122
+ Returns:
123
+ Dictionary with parameters properly separated for OpenAI client
124
+ """
125
+ openai_client_params = {}
126
+ extra_params = {}
127
+
128
+ # Common OpenAI parameters for both endpoint types
129
+ common_params = {
130
+ "model",
131
+ "frequency_penalty",
132
+ "logit_bias",
133
+ "logprobs",
134
+ "max_tokens",
135
+ "n",
136
+ "presence_penalty",
137
+ "seed",
138
+ "stop",
139
+ "stream",
140
+ "temperature",
141
+ "top_p",
142
+ "user",
143
+ }
144
+
145
+ # Standard OpenAI parameters by endpoint type
146
+ if endpoint_type == "chat":
147
+ chat_specific_params = {
148
+ "messages",
149
+ "top_logprobs",
150
+ "response_format",
151
+ "service_tier",
152
+ "stream_options",
153
+ "tools",
154
+ "tool_choice",
155
+ "parallel_tool_calls",
156
+ }
157
+ openai_params = common_params | chat_specific_params
158
+ else: # completion
159
+ completion_specific_params = {
160
+ "prompt",
161
+ "best_of",
162
+ "echo",
163
+ "suffix",
164
+ }
165
+ openai_params = common_params | completion_specific_params
166
+
167
+ for key, value in request_dict.items():
168
+ if key in openai_params:
169
+ openai_client_params[key] = value
170
+ else:
171
+ extra_params[key] = value
172
+
173
+ # If there are custom parameters, use extra_body to pass them through
174
+ if extra_params:
175
+ openai_client_params["extra_body"] = extra_params
176
+
177
+ return openai_client_params
178
+
179
+ def _log_subprocess_output(self, prefix: str):
180
+ """
181
+ Read subprocess output line by line, log to debug, and parse telemetry
182
+ """
183
+
184
+ if self.process.stdout:
185
+ try:
186
+ for line in iter(self.process.stdout.readline, ""):
187
+ if line:
188
+ line_stripped = line.strip()
189
+ logging.debug("%s: %s", prefix, line_stripped)
190
+
191
+ self.telemetry.parse_telemetry_line(line_stripped)
192
+
193
+ if self.process.poll() is not None:
194
+ break
195
+ except UnicodeDecodeError as e:
196
+ logging.debug(
197
+ "Unicode decode error reading subprocess output: %s", str(e)
198
+ )
199
+ except Exception as e: # pylint: disable=broad-exception-caught
200
+ logging.error("Unexpected error reading subprocess output: %s", str(e))
201
+
202
+ def _wait_for_load(self):
203
+ status_code = None
204
+ while not self.process.poll() and status_code != 200:
205
+ health_url = f"http://localhost:{self.port}/health"
206
+ try:
207
+ health_response = requests.get(health_url)
208
+ except requests.exceptions.ConnectionError:
209
+ logging.debug(
210
+ f"Not able to connect to {self.server_name} yet, will retry"
211
+ )
212
+ else:
213
+ status_code = health_response.status_code
214
+ logging.debug(
215
+ f"Testing {self.server_name} readiness (will retry until ready), "
216
+ f"result: {health_response.json()}"
217
+ )
218
+ time.sleep(1)
219
+
220
+ @abstractmethod
221
+ def _launch_server_subprocess(
222
+ self,
223
+ model_config: PullConfig,
224
+ snapshot_files: dict,
225
+ ctx_size: int,
226
+ supports_embeddings: bool = False,
227
+ supports_reranking: bool = False,
228
+ ):
229
+ """
230
+ Launch wrapped server subprocess with appropriate configuration.
231
+
232
+ Args:
233
+ snapshot_files: Dictionary of model files to load
234
+ supports_embeddings: Whether the model supports embeddings
235
+ supports_reranking: Whether the model supports reranking
236
+ """
237
+
238
+ @abstractmethod
239
+ def install_server(self, backend=None):
240
+ """
241
+ Install the wrapped server
242
+ """
243
+
244
+ @abstractmethod
245
+ def download_model(
246
+ self, config_checkpoint, config_mmproj=None, do_not_upgrade=False
247
+ ) -> dict:
248
+ """
249
+ Download a model for the wrapper server
250
+ """
251
+
252
+ def load(
253
+ self,
254
+ model_config: PullConfig,
255
+ ctx_size: int,
256
+ do_not_upgrade: bool = False,
257
+ ):
258
+ # Install and/or update the wrapped server if needed
259
+ try:
260
+ self.install_server()
261
+ except NotImplementedError as e:
262
+ raise HTTPException(
263
+ status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, detail=str(e)
264
+ )
265
+
266
+ # Download the model to the hugging face cache
267
+ snapshot_files = self.download_model(
268
+ model_config.checkpoint, model_config.mmproj, do_not_upgrade=do_not_upgrade
269
+ )
270
+ logging.debug(f"Model file paths: {snapshot_files}")
271
+
272
+ # Check if model supports embeddings
273
+ supported_models = ModelManager().supported_models
274
+ model_info = supported_models.get(model_config.model_name, {})
275
+ supports_embeddings = "embeddings" in model_info.get("labels", [])
276
+ supports_reranking = "reranking" in model_info.get("labels", [])
277
+
278
+ self._launch_server_subprocess(
279
+ model_config=model_config,
280
+ snapshot_files=snapshot_files,
281
+ ctx_size=ctx_size,
282
+ supports_embeddings=supports_embeddings,
283
+ supports_reranking=supports_reranking,
284
+ )
285
+
286
+ # Check the /health endpoint until server is ready
287
+ self._wait_for_load()
288
+
289
+ if self.process.poll():
290
+ raise HTTPException(
291
+ status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
292
+ detail=f"Failed to load {model_config.model_name} with {self.server_name}",
293
+ )
294
+
295
+ def chat_completion(self, chat_completion_request: ChatCompletionRequest):
296
+ client = OpenAI(
297
+ base_url=self.address(),
298
+ api_key="lemonade",
299
+ )
300
+
301
+ # Convert Pydantic model to dict and remove unset/null values
302
+ request_dict = chat_completion_request.model_dump(
303
+ exclude_unset=True, exclude_none=True
304
+ )
305
+
306
+ # Separate standard OpenAI parameters from custom llama.cpp parameters
307
+ openai_client_params = self._separate_openai_params(request_dict, "chat")
308
+
309
+ # Check if streaming is requested
310
+ if chat_completion_request.stream:
311
+
312
+ def event_stream():
313
+ try:
314
+ # Enable streaming
315
+ # pylint: disable=missing-kwoa
316
+ for chunk in client.chat.completions.create(**openai_client_params):
317
+ yield f"data: {chunk.model_dump_json()}\n\n"
318
+ yield "data: [DONE]\n\n"
319
+
320
+ # Show telemetry after completion
321
+ self.telemetry.show_telemetry()
322
+
323
+ except Exception as e: # pylint: disable=broad-exception-caught
324
+ yield f'data: {{"error": "{str(e)}"}}\n\n'
325
+
326
+ return StreamingResponse(
327
+ event_stream(),
328
+ media_type="text/event-stream",
329
+ headers={
330
+ "Cache-Control": "no-cache",
331
+ "Connection": "keep-alive",
332
+ },
333
+ )
334
+ else:
335
+ # Non-streaming response
336
+ try:
337
+ # Disable streaming for non-streaming requests
338
+ # pylint: disable=missing-kwoa
339
+ response = client.chat.completions.create(**openai_client_params)
340
+
341
+ # Show telemetry after completion
342
+ self.telemetry.show_telemetry()
343
+
344
+ return response
345
+
346
+ except Exception as e: # pylint: disable=broad-exception-caught
347
+ logging.error("Error during chat completion: %s", str(e))
348
+ raise HTTPException(
349
+ status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
350
+ detail=f"Chat completion error: {str(e)}",
351
+ )
352
+
353
+ def completion(self, completion_request: CompletionRequest):
354
+ """
355
+ Handle text completions using the wrapped server.
356
+
357
+ Args:
358
+ completion_request: The completion request containing prompt and parameters
359
+ telemetry: Telemetry object containing the server port
360
+
361
+ Returns:
362
+ Completion response from the wrapped server
363
+ """
364
+
365
+ client = OpenAI(
366
+ base_url=self.address(),
367
+ api_key="lemonade",
368
+ )
369
+
370
+ # Convert Pydantic model to dict and remove unset/null values
371
+ request_dict = completion_request.model_dump(
372
+ exclude_unset=True, exclude_none=True
373
+ )
374
+
375
+ # Separate standard OpenAI parameters from custom llama.cpp parameters
376
+ openai_client_params = self._separate_openai_params(request_dict, "completion")
377
+
378
+ # Check if streaming is requested
379
+ if completion_request.stream:
380
+
381
+ def event_stream():
382
+ try:
383
+ # Enable streaming
384
+ # pylint: disable=missing-kwoa
385
+ for chunk in client.completions.create(**openai_client_params):
386
+ yield f"data: {chunk.model_dump_json()}\n\n"
387
+ yield "data: [DONE]\n\n"
388
+
389
+ # Show telemetry after completion
390
+ self.telemetry.show_telemetry()
391
+
392
+ except Exception as e: # pylint: disable=broad-exception-caught
393
+ yield f'data: {{"error": "{str(e)}"}}\n\n'
394
+
395
+ return StreamingResponse(
396
+ event_stream(),
397
+ media_type="text/event-stream",
398
+ headers={
399
+ "Cache-Control": "no-cache",
400
+ "Connection": "keep-alive",
401
+ },
402
+ )
403
+ else:
404
+ # Non-streaming response
405
+ try:
406
+ # Disable streaming for non-streaming requests
407
+ # pylint: disable=missing-kwoa
408
+ response = client.completions.create(**openai_client_params)
409
+
410
+ # Show telemetry after completion
411
+ self.telemetry.show_telemetry()
412
+
413
+ return response
414
+
415
+ except Exception as e: # pylint: disable=broad-exception-caught
416
+ logging.error("Error during completion: %s", str(e))
417
+ raise HTTPException(
418
+ status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
419
+ detail=f"Completion error: {str(e)}",
420
+ )
421
+
422
+ def embeddings(self, embeddings_request: EmbeddingsRequest):
423
+ """
424
+ Generate embeddings using the wrapped server.
425
+
426
+ Args:
427
+ embeddings_request: The embeddings request containing input text/tokens
428
+ telemetry: Telemetry object containing the server port
429
+
430
+ Returns:
431
+ Embeddings response from the wrapped server
432
+ """
433
+ client = OpenAI(
434
+ base_url=self.address(),
435
+ api_key="lemonade",
436
+ )
437
+
438
+ # Convert Pydantic model to dict and remove unset/null values
439
+ request_dict = embeddings_request.model_dump(
440
+ exclude_unset=True, exclude_none=True
441
+ )
442
+
443
+ try:
444
+ # Call the embeddings endpoint
445
+ response = client.embeddings.create(**request_dict)
446
+ return response
447
+
448
+ except Exception as e: # pylint: disable=broad-exception-caught
449
+ raise HTTPException(
450
+ status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
451
+ detail=f"Embeddings error: {str(e)}",
452
+ )
453
+
454
+ def reranking(self, reranking_request: RerankingRequest):
455
+ """
456
+ Rerank documents based on their relevance to a query using the wrapped server.
457
+
458
+ Args:
459
+ reranking_request: The reranking request containing query and documents
460
+ telemetry: Telemetry object containing the server port
461
+
462
+ Returns:
463
+ Reranking response from the wrapped server containing ranked documents and scores
464
+ """
465
+
466
+ try:
467
+ # Convert Pydantic model to dict and exclude unset/null values
468
+ request_dict = reranking_request.model_dump(
469
+ exclude_unset=True, exclude_none=True
470
+ )
471
+
472
+ # Call the reranking endpoint directly since it's not supported by the OpenAI API
473
+ response = requests.post(
474
+ f"{self.address()}/rerank",
475
+ json=request_dict,
476
+ )
477
+ response.raise_for_status()
478
+ return response.json()
479
+
480
+ except Exception as e:
481
+ logging.error("Error during reranking: %s", str(e))
482
+ raise HTTPException(
483
+ status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
484
+ detail=f"Reranking error: {str(e)}",
485
+ ) from e
lemonade/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "8.1.5"
1
+ __version__ = "8.1.7"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lemonade-sdk
3
- Version: 8.1.5
3
+ Version: 8.1.7
4
4
  Summary: Lemonade SDK: Your LLM Aide for Validation and Deployment
5
5
  Author-email: lemonade@amd.com
6
6
  Requires-Python: >=3.10, <3.14
@@ -4,7 +4,7 @@ lemonade/cache.py,sha256=5iZbk273TiTMqK_vdzPOPYTo6VsWW2gNByOISA9zi1w,3002
4
4
  lemonade/cli.py,sha256=9Pcs3PcrWC2F8_pcBaz09xHUICIJTvpemBdPGyXkjIk,4395
5
5
  lemonade/sequence.py,sha256=KSH7BPsiyDKsOsg_ziQKEGsDwMmuO_YbgPRBxkZd0pw,13267
6
6
  lemonade/state.py,sha256=sdSezla7Cd7KYL90xY3p9kcNV4ndSyN6UvNLOr3vBMA,5261
7
- lemonade/version.py,sha256=UE2byv3TrjFSpmrlMMLljjtWoisXIAZ0MoNAF9Lc36k,22
7
+ lemonade/version.py,sha256=DSjpcTXEMbKTSEhH-ChqB5reU3Hdp38wfe3HkEnrtRQ,22
8
8
  lemonade/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
9
  lemonade/common/build.py,sha256=zTb0m1-kuUx6zw5QHp2SNnVuN6jOTMQ2FCdj9iH374U,6140
10
10
  lemonade/common/cli_helpers.py,sha256=hjBfXrTtFl8gmCFlL-ksviXR0mOcdPtTWVNKoEp3PG4,4993
@@ -34,7 +34,7 @@ lemonade/tools/huggingface/load.py,sha256=KsSGOBBD-tNEIfYC8mCWV_jpnkjHMhN3juVmC1
34
34
  lemonade/tools/huggingface/utils.py,sha256=j1S-IgjDsznUIVwkHSqqChmFyqIx9f3WcEelzohWwvU,13955
35
35
  lemonade/tools/llamacpp/bench.py,sha256=1fkE02ecg-jRk92i5dTAXz6re14WH8bd-Z9l-m3lbDA,4844
36
36
  lemonade/tools/llamacpp/load.py,sha256=DFCvQN548Ch9H8U_rHOiYviinzw6vixb5-V7xLj7XE4,6499
37
- lemonade/tools/llamacpp/utils.py,sha256=LZ0xae7tTQG9nP55DLm90PJS8UQEwGJmMIb_96pWDKE,32397
37
+ lemonade/tools/llamacpp/utils.py,sha256=96POJXoIBE_zLArusiOrgyCcz8D5vR3IuXkMzyg79CU,32608
38
38
  lemonade/tools/oga/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
39
39
  lemonade/tools/oga/bench.py,sha256=PJXv4UchcS2YPwijNzef8DY4DSAKYxIYY1ycHuH3T34,5005
40
40
  lemonade/tools/oga/load.py,sha256=BH5ChYbZgeP_ZN4E6HoboJD3kZcUIAPgPEVbgUZpVjQ,33778
@@ -46,11 +46,12 @@ lemonade/tools/report/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3h
46
46
  lemonade/tools/report/llm_report.py,sha256=bVHhwCINA-Ok2EdSwAsLubsc83N3KWOVuwTguw7jDcE,6676
47
47
  lemonade/tools/report/table.py,sha256=ssqy1bZqF-wptNzKEOj6_9REtCNZyXO8R5vakAtg3R4,27973
48
48
  lemonade/tools/server/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
49
- lemonade/tools/server/llamacpp.py,sha256=9OneTx78UgvTzvZbdQidiihAN4F-JyfmhwD0bnj5_IU,21090
50
- lemonade/tools/server/serve.py,sha256=qA0BqYEeRKXtEoS-hG20M_b1WXiiDmyvfEAk72s6XTc,60573
49
+ lemonade/tools/server/llamacpp.py,sha256=w-M0JXrgXVpfICnHBHhJm_yBshfwZ8zge-e1o1kH0R4,8751
50
+ lemonade/tools/server/serve.py,sha256=YVsO7m9E1OsQtbcjkv_1ir8oCSAdr0IXGNEvJ6FKamw,60311
51
51
  lemonade/tools/server/tool_calls.py,sha256=xrAlQwKG-nv2xLlf8f9CDSaUbyMn8ZtHkds9iZLG9K8,5230
52
52
  lemonade/tools/server/tray.py,sha256=a9z6hdqlfj91H00j6hAExRPQkzWHhE3dnqSumzEgq0U,19599
53
53
  lemonade/tools/server/webapp.py,sha256=8Das5yXOaSBLZmSZ_eddJajQFxBhvl5D6GI_hHlGbE0,1040
54
+ lemonade/tools/server/wrapped_server.py,sha256=-knOr2ycmrebVPYrOlCNRJH0ySZPnVlWzbKYLsfTRhE,16441
54
55
  lemonade/tools/server/static/favicon.ico,sha256=hMmP9qGJNeZ0mFS86JIqPbZstXMZn0Z76_HfHQpREAU,126745
55
56
  lemonade/tools/server/static/styles.css,sha256=5HQQCpm8N_fzLcolPiDuhyZw_5nbO8aIl60xAn4RKmg,43385
56
57
  lemonade/tools/server/static/webapp.html,sha256=FX2MZUsljfgxxuF12KBdgvNkso_z-sHewWc0SEGGcGM,18138
@@ -63,15 +64,15 @@ lemonade/tools/server/utils/system_tray.py,sha256=b9lvNv9chJKQxvmH7qzAuUe6H9HsLu
63
64
  lemonade/tools/server/utils/thread.py,sha256=Z-PDzGcpgfN2qxTmtlROWqrUN0B2fXdPrqo_J10fR_w,2772
64
65
  lemonade_install/__init__.py,sha256=26zohKg2jgr_5y7tObduWMYQg8zCTWMZHL8lfi2zZVQ,40
65
66
  lemonade_install/install.py,sha256=onndA2a-ygyLtDfupI8JQFhU_XpK8McGZtGujFasXww,28304
66
- lemonade_sdk-8.1.5.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
67
- lemonade_sdk-8.1.5.dist-info/licenses/NOTICE.md,sha256=RSca9LE5e6pvdWA_LXAUCcACIHPmINKqkRX-AVRqBGo,3499
68
- lemonade_server/cli.py,sha256=-oRbLRlOX6SRH1bZURmlkDujsUBwMVprm83MtVR3DEY,18819
69
- lemonade_server/model_manager.py,sha256=8FsD234ODtqVF3k4GIjo6UnZKSpii11UU_zoIf-kvSU,11133
67
+ lemonade_sdk-8.1.7.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
68
+ lemonade_sdk-8.1.7.dist-info/licenses/NOTICE.md,sha256=RSca9LE5e6pvdWA_LXAUCcACIHPmINKqkRX-AVRqBGo,3499
69
+ lemonade_server/cli.py,sha256=GsYMg095XMADHjWr1ytLYvbbrtSDPwQJEUn0gI-W99o,19108
70
+ lemonade_server/model_manager.py,sha256=xHbHo14q9EbueY6rxijFKUQtf4hgod0w9gBuPw2mVdk,18065
70
71
  lemonade_server/pydantic_models.py,sha256=49MyOlb5feLUlKsGcI75tWaflWckrItqcSVkdCY4e3A,3269
71
72
  lemonade_server/server_models.json,sha256=DAdG4ebIt5Dy5MM3kmXn1pO0XbNMph1gdpzbacBDVuc,11664
72
73
  lemonade_server/settings.py,sha256=6nsmPLFJD-UokQDmlx9ZBYMbpnn48So_PuBGWP7Fmfg,1299
73
- lemonade_sdk-8.1.5.dist-info/METADATA,sha256=DqkuNfUnA3CgSvDSVkz6cQJnQBw75AE9FmbAAoRdrso,16852
74
- lemonade_sdk-8.1.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
75
- lemonade_sdk-8.1.5.dist-info/entry_points.txt,sha256=7sRvpNhi1E7amnM7RZo57e8yFF9iA5uuRaIeJ1Xre6w,193
76
- lemonade_sdk-8.1.5.dist-info/top_level.txt,sha256=10ap5GNiPhalO4V50LRoxA1FqRT9g3Xkia6BITu880k,42
77
- lemonade_sdk-8.1.5.dist-info/RECORD,,
74
+ lemonade_sdk-8.1.7.dist-info/METADATA,sha256=onbpcmKIyz0x7FmhWIzGFD0YnyOoOTUTCxtcxSno1gA,16852
75
+ lemonade_sdk-8.1.7.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
76
+ lemonade_sdk-8.1.7.dist-info/entry_points.txt,sha256=7sRvpNhi1E7amnM7RZo57e8yFF9iA5uuRaIeJ1Xre6w,193
77
+ lemonade_sdk-8.1.7.dist-info/top_level.txt,sha256=10ap5GNiPhalO4V50LRoxA1FqRT9g3Xkia6BITu880k,42
78
+ lemonade_sdk-8.1.7.dist-info/RECORD,,