lemonade-sdk 7.0.1__py3-none-any.whl → 7.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lemonade-sdk might be problematic. Click here for more details.

@@ -8,7 +8,6 @@ import traceback
8
8
  from typing import Optional, Union
9
9
  import json
10
10
  import subprocess
11
- from contextlib import asynccontextmanager
12
11
  from pathlib import Path
13
12
 
14
13
  from fastapi import FastAPI, HTTPException, status, Request
@@ -16,6 +15,8 @@ from fastapi.responses import StreamingResponse
16
15
  from fastapi.middleware.cors import CORSMiddleware
17
16
  from fastapi.staticfiles import StaticFiles
18
17
  import uvicorn
18
+ from uvicorn.config import Config
19
+ from uvicorn.server import Server as UvicornServer
19
20
  from transformers import TextIteratorStreamer, StoppingCriteria, StoppingCriteriaList
20
21
  from tabulate import tabulate
21
22
 
@@ -45,9 +46,7 @@ from openai.types.responses import (
45
46
 
46
47
  import lemonade.api as lemonade_api
47
48
  from lemonade_server.model_manager import ModelManager
48
- from lemonade.tools.management_tools import ManagementTool
49
- import lemonade.tools.server.llamacpp as llamacpp
50
- from lemonade.tools.server.pydantic_models import (
49
+ from lemonade_server.pydantic_models import (
51
50
  DEFAULT_MAX_NEW_TOKENS,
52
51
  LoadConfig,
53
52
  CompletionRequest,
@@ -55,9 +54,11 @@ from lemonade.tools.server.pydantic_models import (
55
54
  ResponsesRequest,
56
55
  PullConfig,
57
56
  )
57
+ from lemonade.tools.management_tools import ManagementTool
58
+ import lemonade.tools.server.llamacpp as llamacpp
58
59
  from lemonade.tools.server.tool_calls import extract_tool_calls, get_tool_call_pattern
59
60
  from lemonade.tools.server.instructions import get_instructions_html
60
-
61
+ from lemonade.tools.server.port_utils import lifespan
61
62
 
62
63
  DEFAULT_PORT = 8000
63
64
  DEFAULT_LOG_LEVEL = "info"
@@ -243,15 +244,22 @@ class Server(ManagementTool):
243
244
 
244
245
  return parser
245
246
 
246
- def run(
247
+ def _setup_server_common(
247
248
  self,
248
- # ManagementTool has a required cache_dir arg, but
249
- # we always use the default cache directory
250
- _=None,
251
- port: int = DEFAULT_PORT,
252
- log_level: str = DEFAULT_LOG_LEVEL,
249
+ port: int,
253
250
  truncate_inputs: bool = False,
251
+ log_level: str = DEFAULT_LOG_LEVEL,
252
+ threaded_mode: bool = False,
254
253
  ):
254
+ """
255
+ Common setup logic shared between run() and run_in_thread().
256
+
257
+ Args:
258
+ port: Port number for the server
259
+ truncate_inputs: Whether to truncate inputs if they exceed max length
260
+ log_level: Logging level to configure
261
+ threaded_mode: Whether this is being set up for threaded execution
262
+ """
255
263
  # Store truncation settings
256
264
  self.truncate_inputs = truncate_inputs
257
265
 
@@ -265,22 +273,27 @@ class Server(ManagementTool):
265
273
 
266
274
  logging.trace = trace
267
275
 
268
- # Configure logging to match uvicorn's format
269
- logging_level = getattr(logging, log_level.upper())
270
- logging.basicConfig(
271
- level=logging_level,
272
- format="%(levelprefix)s %(message)s",
273
- datefmt="%Y-%m-%d %H:%M:%S",
274
- )
276
+ # Configure logging based on mode
277
+ if threaded_mode:
278
+ # Configure logging for warning level (to reduce noise in threaded execution)
279
+ logging.getLogger("uvicorn.error").setLevel(logging.WARNING)
280
+ else:
281
+ # Configure logging to match uvicorn's format
282
+ logging_level = getattr(logging, log_level.upper())
283
+ logging.basicConfig(
284
+ level=logging_level,
285
+ format="%(levelprefix)s %(message)s",
286
+ datefmt="%Y-%m-%d %H:%M:%S",
287
+ )
275
288
 
276
- # Add uvicorn's log formatter
277
- logging.root.handlers[0].formatter = uvicorn.logging.DefaultFormatter(
278
- fmt="%(levelprefix)s %(message)s",
279
- use_colors=True,
280
- )
289
+ # Add uvicorn's log formatter
290
+ logging.root.handlers[0].formatter = uvicorn.logging.DefaultFormatter(
291
+ fmt="%(levelprefix)s %(message)s",
292
+ use_colors=True,
293
+ )
281
294
 
282
- # Ensure the log level is properly set
283
- logging.getLogger().setLevel(logging_level)
295
+ # Ensure the log level is properly set
296
+ logging.getLogger().setLevel(logging_level)
284
297
 
285
298
  # Update debug logging state after setting log level
286
299
  self.debug_logging_enabled = logging.getLogger().isEnabledFor(logging.DEBUG)
@@ -293,8 +306,62 @@ class Server(ManagementTool):
293
306
  # that the lifespan can access it
294
307
  self.app.port = port
295
308
 
309
+ def run(
310
+ self,
311
+ # ManagementTool has a required cache_dir arg, but
312
+ # we always use the default cache directory
313
+ _=None,
314
+ port: int = DEFAULT_PORT,
315
+ log_level: str = DEFAULT_LOG_LEVEL,
316
+ truncate_inputs: bool = False,
317
+ ):
318
+ # Common setup
319
+ self._setup_server_common(
320
+ port=port,
321
+ truncate_inputs=truncate_inputs,
322
+ log_level=log_level,
323
+ threaded_mode=False,
324
+ )
325
+
296
326
  uvicorn.run(self.app, host="localhost", port=port, log_level=log_level)
297
327
 
328
+ def run_in_thread(
329
+ self,
330
+ port: int = DEFAULT_PORT,
331
+ host: str = "localhost",
332
+ log_level: str = "warning",
333
+ truncate_inputs: bool = False,
334
+ ):
335
+ """
336
+ Set up the server for running in a thread.
337
+ Returns a uvicorn server instance that can be controlled externally.
338
+ """
339
+ # Common setup
340
+ self._setup_server_common(
341
+ port=port,
342
+ truncate_inputs=truncate_inputs,
343
+ log_level=log_level,
344
+ threaded_mode=True,
345
+ )
346
+
347
+ class CustomServer(UvicornServer):
348
+ """Custom Uvicorn server that can be properly shutdown from another thread"""
349
+
350
+ def install_signal_handlers(self):
351
+ pass
352
+
353
+ # Configure the server
354
+ config = Config(
355
+ app=self.app,
356
+ host=host,
357
+ port=port,
358
+ log_level=log_level,
359
+ log_config=None,
360
+ )
361
+
362
+ # Create and return the uvicorn server
363
+ return CustomServer(config=config)
364
+
298
365
  async def _show_telemetry(self):
299
366
  """
300
367
  Show telemetry data in debug mode.
@@ -1133,7 +1200,7 @@ class Server(ManagementTool):
1133
1200
  # We will populate a LoadConfig that has all of the required fields
1134
1201
  config_to_use: LoadConfig
1135
1202
 
1136
- # First, validate that the arguments are valid
1203
+ # First, ensure that the arguments are valid
1137
1204
  if config.model_name:
1138
1205
  # Get the dictionary of supported model from disk
1139
1206
  supported_models = ModelManager().supported_models
@@ -1226,7 +1293,7 @@ class Server(ManagementTool):
1226
1293
  try:
1227
1294
  if config_to_use.recipe == "llamacpp":
1228
1295
  self.llama_server_process = llamacpp.server_load(
1229
- checkpoint=config_to_use.checkpoint,
1296
+ model_config=config_to_use,
1230
1297
  model_reference=model_reference,
1231
1298
  telemetry=self.llama_telemetry,
1232
1299
  )
@@ -1241,6 +1308,8 @@ class Server(ManagementTool):
1241
1308
  "status": "success",
1242
1309
  "message": f"Loaded model: {model_reference}",
1243
1310
  }
1311
+ except HTTPException:
1312
+ raise
1244
1313
  except Exception: # pylint: disable=broad-exception-caught
1245
1314
  self.model_load_failure(model_reference)
1246
1315
 
@@ -1339,22 +1408,5 @@ class Server(ManagementTool):
1339
1408
  return response
1340
1409
 
1341
1410
 
1342
- @asynccontextmanager
1343
- async def lifespan(app: FastAPI):
1344
- # Code here will run when the application starts up
1345
-
1346
- logging.info(
1347
- "\n"
1348
- "\n"
1349
- "🍋 Lemonade Server Ready!\n"
1350
- f"🍋 Open http://localhost:{app.port} in your browser for:\n"
1351
- "🍋 💬 chat\n"
1352
- "🍋 💻 model management\n"
1353
- "🍋 📄 docs\n"
1354
- )
1355
-
1356
- yield
1357
-
1358
-
1359
1411
  # This file was originally licensed under Apache 2.0. It has been modified.
1360
1412
  # Modifications Copyright (c) 2025 AMD
@@ -0,0 +1,262 @@
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>Lemonade Server</title>
7
+ <link rel="icon" href="data:,">
8
+ <link rel="stylesheet" href="/static/styles.css">
9
+ <script>
10
+ window.SERVER_PORT = {{SERVER_PORT}};
11
+ </script>
12
+ {{SERVER_MODELS_JS}}
13
+ </head>
14
+ <body>
15
+ <nav class="navbar">
16
+ <a href="https://github.com/lemonade-sdk/lemonade">GitHub</a>
17
+ <a href="https://lemonade-server.ai/docs/">Docs</a>
18
+ <a href="https://lemonade-server.ai/docs/server/server_models/">Models</a>
19
+ <a href="https://lemonade-server.ai/docs/server/apps/">Featured Apps</a>
20
+ </nav>
21
+ <main class="main">
22
+ <div class="title">🍋 Lemonade Server</div>
23
+ <div class="tab-container">
24
+ <div class="tabs">
25
+ <button class="tab active" id="tab-chat" onclick="showTab('chat')">LLM Chat</button>
26
+ <button class="tab" id="tab-models" onclick="showTab('models')">Model Management</button>
27
+ </div>
28
+ <div class="tab-content active" id="content-chat">
29
+ <div class="chat-container">
30
+ <div class="chat-history" id="chat-history"></div>
31
+ <div class="chat-input-row">
32
+ <select id="model-select"></select>
33
+ <input type="text" id="chat-input" placeholder="Type your message..." />
34
+ <button id="send-btn">Send</button>
35
+ </div>
36
+ </div>
37
+ </div>
38
+ <div class="tab-content" id="content-models">
39
+ <div class="model-mgmt-container">
40
+ <div class="model-mgmt-pane">
41
+ <h3>Installed Models</h3>
42
+ <table class="model-table" id="installed-models-table">
43
+ <colgroup><col style="width:100%"></colgroup>
44
+ <tbody id="installed-models-tbody"></tbody>
45
+ </table>
46
+ </div>
47
+ <div class="model-mgmt-pane">
48
+ <h3>Suggested Models</h3>
49
+ <table class="model-table" id="suggested-models-table">
50
+ <tbody id="suggested-models-tbody"></tbody>
51
+ </table>
52
+ </div>
53
+ </div>
54
+ </div>
55
+ </div>
56
+ </main>
57
+ <footer class="site-footer">
58
+ <div class="dad-joke">When life gives you LLMs, make an LLM aide.</div>
59
+ <div class="copyright">Copyright 2025 AMD</div>
60
+ </footer>
61
+ <script src="https://cdn.jsdelivr.net/npm/openai@4.21.0/dist/openai.min.js"></script>
62
+ <script>
63
+ // Tab switching logic
64
+ function showTab(tab) {
65
+ document.getElementById('tab-chat').classList.remove('active');
66
+ document.getElementById('tab-models').classList.remove('active');
67
+ document.getElementById('content-chat').classList.remove('active');
68
+ document.getElementById('content-models').classList.remove('active');
69
+ if (tab === 'chat') {
70
+ document.getElementById('tab-chat').classList.add('active');
71
+ document.getElementById('content-chat').classList.add('active');
72
+ } else {
73
+ document.getElementById('tab-models').classList.add('active');
74
+ document.getElementById('content-models').classList.add('active');
75
+ }
76
+ }
77
+
78
+ // Helper to get server base URL
79
+ function getServerBaseUrl() {
80
+ const port = window.SERVER_PORT || 8000;
81
+ return `http://localhost:${port}`;
82
+ }
83
+
84
+ // Populate model dropdown from /api/v1/models endpoint
85
+ async function loadModels() {
86
+ try {
87
+ const resp = await fetch(getServerBaseUrl() + '/api/v1/models');
88
+ const data = await resp.json();
89
+ const select = document.getElementById('model-select');
90
+ select.innerHTML = '';
91
+ if (!data.data || !Array.isArray(data.data)) {
92
+ select.innerHTML = '<option>No models found (malformed response)</option>';
93
+ return;
94
+ }
95
+ if (data.data.length === 0) {
96
+ select.innerHTML = '<option>No models available</option>';
97
+ return;
98
+ }
99
+ let defaultIndex = 0;
100
+ data.data.forEach(function(model, index) {
101
+ const modelId = model.id || model.name || model;
102
+ const opt = document.createElement('option');
103
+ opt.value = modelId;
104
+ opt.textContent = modelId;
105
+ if (modelId === 'Llama-3.2-1B-Instruct-Hybrid') {
106
+ defaultIndex = index;
107
+ }
108
+ select.appendChild(opt);
109
+ });
110
+ select.selectedIndex = defaultIndex;
111
+ } catch (e) {
112
+ const select = document.getElementById('model-select');
113
+ select.innerHTML = `<option>Error loading models: ${e.message}</option>`;
114
+ console.error('Error loading models:', e);
115
+ }
116
+ }
117
+ loadModels();
118
+
119
+ // Model Management Tab Logic
120
+ async function refreshModelMgmtUI() {
121
+ // Get installed models from /api/v1/models
122
+ let installed = [];
123
+ try {
124
+ const resp = await fetch(getServerBaseUrl() + '/api/v1/models');
125
+ const data = await resp.json();
126
+ if (data.data && Array.isArray(data.data)) {
127
+ installed = data.data.map(m => m.id || m.name || m);
128
+ }
129
+ } catch (e) {}
130
+ // All models from server_models.json (window.SERVER_MODELS)
131
+ const allModels = window.SERVER_MODELS || {};
132
+ // Filter suggested models not installed
133
+ const suggested = Object.keys(allModels).filter(
134
+ k => allModels[k].suggested && !installed.includes(k)
135
+ );
136
+ // Render installed models as a table (two columns, second is invisible)
137
+ const installedTbody = document.getElementById('installed-models-tbody');
138
+ installedTbody.innerHTML = '';
139
+ installed.forEach(function(mid) {
140
+ var tr = document.createElement('tr');
141
+ var tdName = document.createElement('td');
142
+ tdName.textContent = mid;
143
+ var tdEmpty = document.createElement('td');
144
+ tdEmpty.style.width = '0';
145
+ tdEmpty.style.padding = '0';
146
+ tdEmpty.style.border = 'none';
147
+ tr.appendChild(tdName);
148
+ tr.appendChild(tdEmpty);
149
+ installedTbody.appendChild(tr);
150
+ });
151
+ // Render suggested models as a table
152
+ const suggestedTbody = document.getElementById('suggested-models-tbody');
153
+ suggestedTbody.innerHTML = '';
154
+ suggested.forEach(mid => {
155
+ const tr = document.createElement('tr');
156
+ const tdName = document.createElement('td');
157
+ tdName.textContent = mid;
158
+ tdName.style.paddingRight = '1em';
159
+ tdName.style.verticalAlign = 'middle';
160
+ const tdBtn = document.createElement('td');
161
+ tdBtn.style.width = '1%';
162
+ tdBtn.style.verticalAlign = 'middle';
163
+ const btn = document.createElement('button');
164
+ btn.textContent = '+';
165
+ btn.title = 'Install model';
166
+ btn.onclick = async function() {
167
+ btn.disabled = true;
168
+ btn.textContent = 'Installing...';
169
+ btn.classList.add('installing-btn');
170
+ try {
171
+ await fetch(getServerBaseUrl() + '/api/v1/pull', {
172
+ method: 'POST',
173
+ headers: { 'Content-Type': 'application/json' },
174
+ body: JSON.stringify({ model_name: mid })
175
+ });
176
+ await refreshModelMgmtUI();
177
+ await loadModels(); // update chat dropdown too
178
+ } catch (e) {
179
+ btn.textContent = 'Error';
180
+ }
181
+ };
182
+ tdBtn.appendChild(btn);
183
+ tr.appendChild(tdName);
184
+ tr.appendChild(tdBtn);
185
+ suggestedTbody.appendChild(tr);
186
+ });
187
+ }
188
+ // Initial load
189
+ refreshModelMgmtUI();
190
+ // Optionally, refresh when switching to the tab
191
+ document.getElementById('tab-models').addEventListener('click', refreshModelMgmtUI);
192
+
193
+ // Chat logic (streaming with OpenAI JS client placeholder)
194
+ const chatHistory = document.getElementById('chat-history');
195
+ const chatInput = document.getElementById('chat-input');
196
+ const sendBtn = document.getElementById('send-btn');
197
+ const modelSelect = document.getElementById('model-select');
198
+ let messages = [];
199
+
200
+ function appendMessage(role, text) {
201
+ const div = document.createElement('div');
202
+ div.className = 'chat-message ' + role;
203
+ // Add a bubble for iMessage style
204
+ const bubble = document.createElement('div');
205
+ bubble.className = 'chat-bubble ' + role;
206
+ bubble.innerHTML = text;
207
+ div.appendChild(bubble);
208
+ chatHistory.appendChild(div);
209
+ chatHistory.scrollTop = chatHistory.scrollHeight;
210
+ }
211
+
212
+ async function sendMessage() {
213
+ const text = chatInput.value.trim();
214
+ if (!text) return;
215
+ appendMessage('user', text);
216
+ messages.push({ role: 'user', content: text });
217
+ chatInput.value = '';
218
+ sendBtn.disabled = true;
219
+ // Streaming OpenAI completions (placeholder, adapt as needed)
220
+ let llmText = '';
221
+ appendMessage('llm', '...');
222
+ const llmDiv = chatHistory.lastChild.querySelector('.chat-bubble.llm');
223
+ try {
224
+ // Use the correct endpoint for chat completions
225
+ const resp = await fetch(getServerBaseUrl() + '/api/v1/chat/completions', {
226
+ method: 'POST',
227
+ headers: { 'Content-Type': 'application/json' },
228
+ body: JSON.stringify({
229
+ model: modelSelect.value,
230
+ messages: messages,
231
+ stream: true
232
+ })
233
+ });
234
+ if (!resp.body) throw new Error('No stream');
235
+ const reader = resp.body.getReader();
236
+ let decoder = new TextDecoder();
237
+ llmDiv.textContent = '';
238
+ while (true) {
239
+ const { done, value } = await reader.read();
240
+ if (done) break;
241
+ const chunk = decoder.decode(value);
242
+ if (chunk.trim() === 'data: [DONE]' || chunk.trim() === '[DONE]') continue;
243
+ // Try to extract the content from the OpenAI chunk
244
+ const match = chunk.match(/"content"\s*:\s*"([^"]*)"/);
245
+ if (match && match[1]) {
246
+ llmText += match[1];
247
+ llmDiv.textContent = llmText;
248
+ }
249
+ }
250
+ messages.push({ role: 'assistant', content: llmText });
251
+ } catch (e) {
252
+ llmDiv.textContent = '[Error: ' + e.message + ']';
253
+ }
254
+ sendBtn.disabled = false;
255
+ }
256
+ sendBtn.onclick = sendMessage;
257
+ chatInput.addEventListener('keydown', function(e) {
258
+ if (e.key === 'Enter') sendMessage();
259
+ });
260
+ </script>
261
+ </body>
262
+ </html>
@@ -0,0 +1,87 @@
1
+ import threading
2
+ import logging
3
+ from lemonade.tools.server.serve import Server
4
+
5
+
6
+ class ServerRunner(threading.Thread):
7
+ """
8
+ Thread class for running the Lemonade Server with a loaded model.
9
+ """
10
+
11
+ def __init__(
12
+ self, model, tokenizer, checkpoint, recipe, host="localhost", port=8000
13
+ ):
14
+ threading.Thread.__init__(self)
15
+ self.model = model
16
+ self.tokenizer = tokenizer
17
+ self.checkpoint = checkpoint
18
+ self.recipe = recipe
19
+ self.host = host
20
+ self.port = port
21
+ self.server = None
22
+ self.ready_event = threading.Event()
23
+ self.shutdown_event = threading.Event()
24
+ self.uvicorn_server = None
25
+
26
+ def run(self):
27
+ try:
28
+ # Create the server instance
29
+ self.server = Server()
30
+
31
+ # Configure the server with model/tokenizer
32
+ self.server.model = self.model
33
+ self.server.tokenizer = self.tokenizer
34
+ self.server.llm_loaded = type(
35
+ "obj",
36
+ (object,),
37
+ {
38
+ "checkpoint": self.checkpoint,
39
+ "recipe": self.recipe,
40
+ "max_prompt_length": None,
41
+ "reasoning": False,
42
+ "model_name": "custom",
43
+ },
44
+ )
45
+
46
+ # Set up the server for threaded execution
47
+ self.uvicorn_server = self.server.run_in_thread(
48
+ port=self.port, host=self.host, log_level="warning"
49
+ )
50
+
51
+ # Set the ready event
52
+ self.ready_event.set()
53
+
54
+ # Run the server until shutdown is requested
55
+ logging.info(f"Starting server on http://{self.host}:{self.port}")
56
+ self.uvicorn_server.run()
57
+
58
+ except Exception as e:
59
+ logging.error(f"Error starting server: {e}")
60
+ self.ready_event.set()
61
+ raise
62
+
63
+ def shutdown(self):
64
+ """Shutdown the server"""
65
+ if hasattr(self, "uvicorn_server") and self.uvicorn_server:
66
+ logging.info("Shutting down server...")
67
+ self.uvicorn_server.should_exit = True
68
+ self.shutdown_event.set()
69
+
70
+ # Clean up resources properly to avoid memory leaks
71
+ if hasattr(self, "server") and self.server:
72
+ logging.info("Cleaning up model and tokenizer resources...")
73
+
74
+ if hasattr(self.server, "model"):
75
+ self.server.model = None
76
+
77
+ if hasattr(self.server, "tokenizer"):
78
+ self.server.tokenizer = None
79
+
80
+ if hasattr(self.server, "llm_loaded"):
81
+ self.server.llm_loaded = None
82
+
83
+ # Clean up local references
84
+ if hasattr(self, "model"):
85
+ del self.model
86
+ if hasattr(self, "tokenizer"):
87
+ del self.tokenizer
lemonade/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "7.0.1"
1
+ __version__ = "7.0.3"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lemonade-sdk
3
- Version: 7.0.1
3
+ Version: 7.0.3
4
4
  Summary: Lemonade SDK: Your LLM Aide for Validation and Deployment
5
5
  Author-email: lemonade@amd.com
6
6
  Requires-Python: >=3.10, <3.12
@@ -1,10 +1,10 @@
1
1
  lemonade/__init__.py,sha256=W1Qk7r0rnQqFhPNHp6BIBT_q-OH3s-8Q_POoVfAmKW0,117
2
2
  lemonade/api.py,sha256=9apNWSMS4bYpYl7iqDA4CsHHOOMdjOIuJhNYSqj_jIA,3878
3
3
  lemonade/cache.py,sha256=djr2qgyUUAWlQv8FehU9qlNtCwK0IZqo82hcBDyZ3-A,2850
4
- lemonade/cli.py,sha256=_s-LWpaVIhOmaP0Q1qirXxNiBhdumAZ-5ub5-lRNccs,4351
4
+ lemonade/cli.py,sha256=ddN2QqsGMsVwydfcR7MSZu1z8_-bUgUP7dhw9lzbHa8,4424
5
5
  lemonade/sequence.py,sha256=KSH7BPsiyDKsOsg_ziQKEGsDwMmuO_YbgPRBxkZd0pw,13267
6
6
  lemonade/state.py,sha256=sdSezla7Cd7KYL90xY3p9kcNV4ndSyN6UvNLOr3vBMA,5261
7
- lemonade/version.py,sha256=co6LyaBArt-ahHXYZSdSER8TFZ2vVTb86CNG6X8Pxwc,22
7
+ lemonade/version.py,sha256=Ur-fY8dgd79WuOM208uDSw5amQiSzM7VmTbWPLQBZvw,22
8
8
  lemonade/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
9
  lemonade/common/analyze_model.py,sha256=sYWDznEUEWjx_Qekg7f1hHY4Pfe87IQ77lmsWqePgE0,803
10
10
  lemonade/common/build.py,sha256=Pk86mCr6fyBIx2zXDpq0BkdahlCmWRnwSTpShA_gwZw,7849
@@ -23,20 +23,21 @@ lemonade/profilers/__init__.py,sha256=JKVonvJ4XZ9_6sKXPWsiMLQCNyzQOxhQw5BEHR1qOf
23
23
  lemonade/profilers/memory_tracker.py,sha256=-SSBmNlrweiX59wyNtLMWiwaMOskBzNO1_cufVwteqs,9357
24
24
  lemonade/profilers/profiler.py,sha256=y_iMGr1ToQ6rcwcIcXck4ajapisLXCfHggiV-IpPF98,1666
25
25
  lemonade/tools/__init__.py,sha256=_6xRc-FHxmujoLjLjWtpYrWYEXtCSneSy-5ya01kyPk,53
26
+ lemonade/tools/accuracy.py,sha256=QndammQ1bmlTaF_6YDaaiJp6fpkKZDYGySdQpAgZIp8,11699
26
27
  lemonade/tools/adapter.py,sha256=4H6gfbjvqyU6qm1_-b2FE-c3a7N9OzEBeDVnIwqRDvg,3014
27
28
  lemonade/tools/bench.py,sha256=aN5LMA_EH6-ZhAH3Gf26JYL7s0eKpUd3j-bReRhzvEY,10016
28
29
  lemonade/tools/huggingface_bench.py,sha256=POE5JYzArK2FBktazOkluLNFzlLctM39B19fK5sMx-0,10017
29
- lemonade/tools/huggingface_load.py,sha256=i4duS1DTs797savylsR5TxZRHg8Rjhd7Ogtb0fgoWNA,18716
30
+ lemonade/tools/huggingface_load.py,sha256=857GxaQcqmSv2DSsMh503aSicwQDQg5wGGlpwehHHrg,18868
30
31
  lemonade/tools/humaneval.py,sha256=RCkVR-yOL56T4EyURaU3MR3yhU4NCbeuWHDyhVWZtxw,9502
31
32
  lemonade/tools/llamacpp.py,sha256=uv-xv5KfHm0eU1I6vEKuaRC-QpilE1FffVA-zoCvHt4,8659
32
33
  lemonade/tools/llamacpp_bench.py,sha256=tZamG-1Z5pG_bD4O4yz2mUo2AWwEgOw9RSdEDllW4HY,5941
33
34
  lemonade/tools/management_tools.py,sha256=RO-lU-hjZhrP9KD9qcLI7MrLu-Rxnkrxzn45qqwKInE,8554
34
35
  lemonade/tools/mmlu.py,sha256=hNa7A8dhpjOtgfd5MGcagpwpw4_AZRZvVj5Duz9LJ88,11381
35
36
  lemonade/tools/perplexity.py,sha256=Z6ha7LS5DhdZWHZxhDz8mDnfESbTGc6TGo8KnPjRmiE,5606
36
- lemonade/tools/prompt.py,sha256=eFm-KsJCzaO_iDaj5JkwZGxceaw0bnhx60ZieJ095k0,7593
37
+ lemonade/tools/prompt.py,sha256=AhRdWpx5BVnuJTmCsxSCw_oKHRlTiRLmOkriXon_mLE,8629
37
38
  lemonade/tools/tool.py,sha256=UsxVYukfm_iM3BfeGYPZxQlTK5UfDfDOl3RIyLr8A1Y,13256
38
39
  lemonade/tools/ort_genai/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
39
- lemonade/tools/ort_genai/oga.py,sha256=-6y90ivX-LuQK3-ZBSM3llXHtKfBmMxdm7mPvTKSYdU,43883
40
+ lemonade/tools/ort_genai/oga.py,sha256=dZ6kbwHBVfzTujAG0ojYDhjS8uH6kwW5xZTcu20hFIc,43886
40
41
  lemonade/tools/ort_genai/oga_bench.py,sha256=T3c40NevM3NA7CT98B6vBj1nXfdITDqpfMHYSjhjwpA,5061
41
42
  lemonade/tools/quark/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
42
43
  lemonade/tools/quark/quark_load.py,sha256=QWzhXP8MehgD_KjnsmN5a-3D5kdI2XZtKTH4HoDoFoo,5572
@@ -45,21 +46,24 @@ lemonade/tools/report/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3h
45
46
  lemonade/tools/report/llm_report.py,sha256=bVHhwCINA-Ok2EdSwAsLubsc83N3KWOVuwTguw7jDcE,6676
46
47
  lemonade/tools/report/table.py,sha256=a0TXo1X84RxCSu0un_XM3ANOlhLtPDuqtGwR7eomf2s,24853
47
48
  lemonade/tools/server/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
48
- lemonade/tools/server/instructions.py,sha256=Lvm-tRZaYgHkyt3zQkmMChkXO6rUiLoIAunudmMr_D8,13388
49
- lemonade/tools/server/llamacpp.py,sha256=PeHg1DbMGcf68txFgC1CJJN5HRHEnIJ4_4EDhvqAFUI,9255
50
- lemonade/tools/server/pydantic_models.py,sha256=z1RAs9hkAFkOfMiTPtmUiC3CD2P6OMI2N0J2ztNs0d4,2179
51
- lemonade/tools/server/serve.py,sha256=7meKOKVHaODHBYD_3dDJyaiwoC_m4z_FWniZfsZ9cCI,50655
49
+ lemonade/tools/server/instructions.py,sha256=PbQ8HItagIWbJLYf2IVPhthYVi1E878vNdS42qmTc3E,1230
50
+ lemonade/tools/server/llamacpp.py,sha256=YqUzx-TmyvWMrZfue7xURFfgTRLPGGSzNJtF9GERC_8,10184
51
+ lemonade/tools/server/port_utils.py,sha256=24Ryz5cNU0R9L1kuVSapZoyXTZHzhF4y0Yje9MVOrE0,1535
52
+ lemonade/tools/server/serve.py,sha256=O2ZcM1xogIRAqBE49tQ-gTFpEXExlwHOT3bYL1rZgmc,52483
53
+ lemonade/tools/server/thread_utils.py,sha256=pK9K_6DNWoQ78NArkAX3Ym2WsxLnCs9sKTk6TitlYnI,2804
52
54
  lemonade/tools/server/tool_calls.py,sha256=xrAlQwKG-nv2xLlf8f9CDSaUbyMn8ZtHkds9iZLG9K8,5230
55
+ lemonade/tools/server/static/instructions.html,sha256=tCkc55LrI4oWQM2VYuK3_m02MvG5XxIcTbCSgxyTAIU,11257
53
56
  lemonade/tools/server/static/styles.css,sha256=8U1EejQaqRLQ6QTCF5UG_dLPtLjRwT1menUHMDhaq2M,5045
54
57
  lemonade_install/__init__.py,sha256=26zohKg2jgr_5y7tObduWMYQg8zCTWMZHL8lfi2zZVQ,40
55
58
  lemonade_install/install.py,sha256=61qUO7kWCLcdjK0_IQZ46-rKP_AWkyznh4YpDclPKyM,28036
56
- lemonade_sdk-7.0.1.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
57
- lemonade_sdk-7.0.1.dist-info/licenses/NOTICE.md,sha256=B8lEqi4QE41J9ljz4Riv2JgHD1v8GCZE6nNBHO3KIA0,2135
59
+ lemonade_sdk-7.0.3.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
60
+ lemonade_sdk-7.0.3.dist-info/licenses/NOTICE.md,sha256=B8lEqi4QE41J9ljz4Riv2JgHD1v8GCZE6nNBHO3KIA0,2135
58
61
  lemonade_server/cli.py,sha256=DR6sIt66K1sZZG3ascEw_6HUgz3UhU9KGUyzxf4nO_A,7351
59
- lemonade_server/model_manager.py,sha256=WDGDxrKjq-u2GkGWLNUsRk0d74J-RG2yCYEnH8WMnDw,4010
60
- lemonade_server/server_models.json,sha256=ZSg1R555bLVW4U7BPaYX5ZgwaJVNAP3z1C62dzMRqAM,6198
61
- lemonade_sdk-7.0.1.dist-info/METADATA,sha256=bvg9-Tzg_v8sTKjkAJtLahpDq_GmLDMDKA9PTisaNGw,5443
62
- lemonade_sdk-7.0.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
63
- lemonade_sdk-7.0.1.dist-info/entry_points.txt,sha256=gJppn0ETtXXR6ceKWEIRdk42kMC7ps59EmU3NCPyPUk,144
64
- lemonade_sdk-7.0.1.dist-info/top_level.txt,sha256=10ap5GNiPhalO4V50LRoxA1FqRT9g3Xkia6BITu880k,42
65
- lemonade_sdk-7.0.1.dist-info/RECORD,,
62
+ lemonade_server/model_manager.py,sha256=-r9JS_fPcoLCQCFKZfkInBIIgT4F1tQ_EIKqMqNYpqM,5546
63
+ lemonade_server/pydantic_models.py,sha256=pdOZW6nAYKWKllMLR7y5wdbIofIznxe5Vehac0Hgqto,2276
64
+ lemonade_server/server_models.json,sha256=3C-lJ2lsNwdy0AKT_US_lcVOoiF3xmadbiOUeOQuJXA,6927
65
+ lemonade_sdk-7.0.3.dist-info/METADATA,sha256=pSSPTu7kUyAh4W8lCVvxS-WAnjMT9Dsyw0r0WHcrxgA,5443
66
+ lemonade_sdk-7.0.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
67
+ lemonade_sdk-7.0.3.dist-info/entry_points.txt,sha256=gJppn0ETtXXR6ceKWEIRdk42kMC7ps59EmU3NCPyPUk,144
68
+ lemonade_sdk-7.0.3.dist-info/top_level.txt,sha256=10ap5GNiPhalO4V50LRoxA1FqRT9g3Xkia6BITu880k,42
69
+ lemonade_sdk-7.0.3.dist-info/RECORD,,