lemonade-sdk 7.0.1__py3-none-any.whl → 7.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lemonade-sdk might be problematic. Click here for more details.
- lemonade/cli.py +2 -0
- lemonade/tools/accuracy.py +335 -0
- lemonade/tools/huggingface_load.py +6 -0
- lemonade/tools/ort_genai/oga.py +6 -4
- lemonade/tools/prompt.py +28 -1
- lemonade/tools/server/instructions.py +8 -265
- lemonade/tools/server/llamacpp.py +45 -19
- lemonade/tools/server/port_utils.py +57 -0
- lemonade/tools/server/serve.py +96 -44
- lemonade/tools/server/static/instructions.html +262 -0
- lemonade/tools/server/thread_utils.py +87 -0
- lemonade/version.py +1 -1
- {lemonade_sdk-7.0.1.dist-info → lemonade_sdk-7.0.3.dist-info}/METADATA +1 -1
- {lemonade_sdk-7.0.1.dist-info → lemonade_sdk-7.0.3.dist-info}/RECORD +22 -18
- lemonade_server/model_manager.py +45 -12
- {lemonade/tools/server → lemonade_server}/pydantic_models.py +2 -0
- lemonade_server/server_models.json +25 -4
- {lemonade_sdk-7.0.1.dist-info → lemonade_sdk-7.0.3.dist-info}/WHEEL +0 -0
- {lemonade_sdk-7.0.1.dist-info → lemonade_sdk-7.0.3.dist-info}/entry_points.txt +0 -0
- {lemonade_sdk-7.0.1.dist-info → lemonade_sdk-7.0.3.dist-info}/licenses/LICENSE +0 -0
- {lemonade_sdk-7.0.1.dist-info → lemonade_sdk-7.0.3.dist-info}/licenses/NOTICE.md +0 -0
- {lemonade_sdk-7.0.1.dist-info → lemonade_sdk-7.0.3.dist-info}/top_level.txt +0 -0
lemonade/tools/server/serve.py
CHANGED
|
@@ -8,7 +8,6 @@ import traceback
|
|
|
8
8
|
from typing import Optional, Union
|
|
9
9
|
import json
|
|
10
10
|
import subprocess
|
|
11
|
-
from contextlib import asynccontextmanager
|
|
12
11
|
from pathlib import Path
|
|
13
12
|
|
|
14
13
|
from fastapi import FastAPI, HTTPException, status, Request
|
|
@@ -16,6 +15,8 @@ from fastapi.responses import StreamingResponse
|
|
|
16
15
|
from fastapi.middleware.cors import CORSMiddleware
|
|
17
16
|
from fastapi.staticfiles import StaticFiles
|
|
18
17
|
import uvicorn
|
|
18
|
+
from uvicorn.config import Config
|
|
19
|
+
from uvicorn.server import Server as UvicornServer
|
|
19
20
|
from transformers import TextIteratorStreamer, StoppingCriteria, StoppingCriteriaList
|
|
20
21
|
from tabulate import tabulate
|
|
21
22
|
|
|
@@ -45,9 +46,7 @@ from openai.types.responses import (
|
|
|
45
46
|
|
|
46
47
|
import lemonade.api as lemonade_api
|
|
47
48
|
from lemonade_server.model_manager import ModelManager
|
|
48
|
-
from
|
|
49
|
-
import lemonade.tools.server.llamacpp as llamacpp
|
|
50
|
-
from lemonade.tools.server.pydantic_models import (
|
|
49
|
+
from lemonade_server.pydantic_models import (
|
|
51
50
|
DEFAULT_MAX_NEW_TOKENS,
|
|
52
51
|
LoadConfig,
|
|
53
52
|
CompletionRequest,
|
|
@@ -55,9 +54,11 @@ from lemonade.tools.server.pydantic_models import (
|
|
|
55
54
|
ResponsesRequest,
|
|
56
55
|
PullConfig,
|
|
57
56
|
)
|
|
57
|
+
from lemonade.tools.management_tools import ManagementTool
|
|
58
|
+
import lemonade.tools.server.llamacpp as llamacpp
|
|
58
59
|
from lemonade.tools.server.tool_calls import extract_tool_calls, get_tool_call_pattern
|
|
59
60
|
from lemonade.tools.server.instructions import get_instructions_html
|
|
60
|
-
|
|
61
|
+
from lemonade.tools.server.port_utils import lifespan
|
|
61
62
|
|
|
62
63
|
DEFAULT_PORT = 8000
|
|
63
64
|
DEFAULT_LOG_LEVEL = "info"
|
|
@@ -243,15 +244,22 @@ class Server(ManagementTool):
|
|
|
243
244
|
|
|
244
245
|
return parser
|
|
245
246
|
|
|
246
|
-
def
|
|
247
|
+
def _setup_server_common(
|
|
247
248
|
self,
|
|
248
|
-
|
|
249
|
-
# we always use the default cache directory
|
|
250
|
-
_=None,
|
|
251
|
-
port: int = DEFAULT_PORT,
|
|
252
|
-
log_level: str = DEFAULT_LOG_LEVEL,
|
|
249
|
+
port: int,
|
|
253
250
|
truncate_inputs: bool = False,
|
|
251
|
+
log_level: str = DEFAULT_LOG_LEVEL,
|
|
252
|
+
threaded_mode: bool = False,
|
|
254
253
|
):
|
|
254
|
+
"""
|
|
255
|
+
Common setup logic shared between run() and run_in_thread().
|
|
256
|
+
|
|
257
|
+
Args:
|
|
258
|
+
port: Port number for the server
|
|
259
|
+
truncate_inputs: Whether to truncate inputs if they exceed max length
|
|
260
|
+
log_level: Logging level to configure
|
|
261
|
+
threaded_mode: Whether this is being set up for threaded execution
|
|
262
|
+
"""
|
|
255
263
|
# Store truncation settings
|
|
256
264
|
self.truncate_inputs = truncate_inputs
|
|
257
265
|
|
|
@@ -265,22 +273,27 @@ class Server(ManagementTool):
|
|
|
265
273
|
|
|
266
274
|
logging.trace = trace
|
|
267
275
|
|
|
268
|
-
# Configure logging
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
276
|
+
# Configure logging based on mode
|
|
277
|
+
if threaded_mode:
|
|
278
|
+
# Configure logging for warning level (to reduce noise in threaded execution)
|
|
279
|
+
logging.getLogger("uvicorn.error").setLevel(logging.WARNING)
|
|
280
|
+
else:
|
|
281
|
+
# Configure logging to match uvicorn's format
|
|
282
|
+
logging_level = getattr(logging, log_level.upper())
|
|
283
|
+
logging.basicConfig(
|
|
284
|
+
level=logging_level,
|
|
285
|
+
format="%(levelprefix)s %(message)s",
|
|
286
|
+
datefmt="%Y-%m-%d %H:%M:%S",
|
|
287
|
+
)
|
|
275
288
|
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
289
|
+
# Add uvicorn's log formatter
|
|
290
|
+
logging.root.handlers[0].formatter = uvicorn.logging.DefaultFormatter(
|
|
291
|
+
fmt="%(levelprefix)s %(message)s",
|
|
292
|
+
use_colors=True,
|
|
293
|
+
)
|
|
281
294
|
|
|
282
|
-
|
|
283
|
-
|
|
295
|
+
# Ensure the log level is properly set
|
|
296
|
+
logging.getLogger().setLevel(logging_level)
|
|
284
297
|
|
|
285
298
|
# Update debug logging state after setting log level
|
|
286
299
|
self.debug_logging_enabled = logging.getLogger().isEnabledFor(logging.DEBUG)
|
|
@@ -293,8 +306,62 @@ class Server(ManagementTool):
|
|
|
293
306
|
# that the lifespan can access it
|
|
294
307
|
self.app.port = port
|
|
295
308
|
|
|
309
|
+
def run(
|
|
310
|
+
self,
|
|
311
|
+
# ManagementTool has a required cache_dir arg, but
|
|
312
|
+
# we always use the default cache directory
|
|
313
|
+
_=None,
|
|
314
|
+
port: int = DEFAULT_PORT,
|
|
315
|
+
log_level: str = DEFAULT_LOG_LEVEL,
|
|
316
|
+
truncate_inputs: bool = False,
|
|
317
|
+
):
|
|
318
|
+
# Common setup
|
|
319
|
+
self._setup_server_common(
|
|
320
|
+
port=port,
|
|
321
|
+
truncate_inputs=truncate_inputs,
|
|
322
|
+
log_level=log_level,
|
|
323
|
+
threaded_mode=False,
|
|
324
|
+
)
|
|
325
|
+
|
|
296
326
|
uvicorn.run(self.app, host="localhost", port=port, log_level=log_level)
|
|
297
327
|
|
|
328
|
+
def run_in_thread(
|
|
329
|
+
self,
|
|
330
|
+
port: int = DEFAULT_PORT,
|
|
331
|
+
host: str = "localhost",
|
|
332
|
+
log_level: str = "warning",
|
|
333
|
+
truncate_inputs: bool = False,
|
|
334
|
+
):
|
|
335
|
+
"""
|
|
336
|
+
Set up the server for running in a thread.
|
|
337
|
+
Returns a uvicorn server instance that can be controlled externally.
|
|
338
|
+
"""
|
|
339
|
+
# Common setup
|
|
340
|
+
self._setup_server_common(
|
|
341
|
+
port=port,
|
|
342
|
+
truncate_inputs=truncate_inputs,
|
|
343
|
+
log_level=log_level,
|
|
344
|
+
threaded_mode=True,
|
|
345
|
+
)
|
|
346
|
+
|
|
347
|
+
class CustomServer(UvicornServer):
|
|
348
|
+
"""Custom Uvicorn server that can be properly shutdown from another thread"""
|
|
349
|
+
|
|
350
|
+
def install_signal_handlers(self):
|
|
351
|
+
pass
|
|
352
|
+
|
|
353
|
+
# Configure the server
|
|
354
|
+
config = Config(
|
|
355
|
+
app=self.app,
|
|
356
|
+
host=host,
|
|
357
|
+
port=port,
|
|
358
|
+
log_level=log_level,
|
|
359
|
+
log_config=None,
|
|
360
|
+
)
|
|
361
|
+
|
|
362
|
+
# Create and return the uvicorn server
|
|
363
|
+
return CustomServer(config=config)
|
|
364
|
+
|
|
298
365
|
async def _show_telemetry(self):
|
|
299
366
|
"""
|
|
300
367
|
Show telemetry data in debug mode.
|
|
@@ -1133,7 +1200,7 @@ class Server(ManagementTool):
|
|
|
1133
1200
|
# We will populate a LoadConfig that has all of the required fields
|
|
1134
1201
|
config_to_use: LoadConfig
|
|
1135
1202
|
|
|
1136
|
-
# First,
|
|
1203
|
+
# First, ensure that the arguments are valid
|
|
1137
1204
|
if config.model_name:
|
|
1138
1205
|
# Get the dictionary of supported model from disk
|
|
1139
1206
|
supported_models = ModelManager().supported_models
|
|
@@ -1226,7 +1293,7 @@ class Server(ManagementTool):
|
|
|
1226
1293
|
try:
|
|
1227
1294
|
if config_to_use.recipe == "llamacpp":
|
|
1228
1295
|
self.llama_server_process = llamacpp.server_load(
|
|
1229
|
-
|
|
1296
|
+
model_config=config_to_use,
|
|
1230
1297
|
model_reference=model_reference,
|
|
1231
1298
|
telemetry=self.llama_telemetry,
|
|
1232
1299
|
)
|
|
@@ -1241,6 +1308,8 @@ class Server(ManagementTool):
|
|
|
1241
1308
|
"status": "success",
|
|
1242
1309
|
"message": f"Loaded model: {model_reference}",
|
|
1243
1310
|
}
|
|
1311
|
+
except HTTPException:
|
|
1312
|
+
raise
|
|
1244
1313
|
except Exception: # pylint: disable=broad-exception-caught
|
|
1245
1314
|
self.model_load_failure(model_reference)
|
|
1246
1315
|
|
|
@@ -1339,22 +1408,5 @@ class Server(ManagementTool):
|
|
|
1339
1408
|
return response
|
|
1340
1409
|
|
|
1341
1410
|
|
|
1342
|
-
@asynccontextmanager
|
|
1343
|
-
async def lifespan(app: FastAPI):
|
|
1344
|
-
# Code here will run when the application starts up
|
|
1345
|
-
|
|
1346
|
-
logging.info(
|
|
1347
|
-
"\n"
|
|
1348
|
-
"\n"
|
|
1349
|
-
"🍋 Lemonade Server Ready!\n"
|
|
1350
|
-
f"🍋 Open http://localhost:{app.port} in your browser for:\n"
|
|
1351
|
-
"🍋 💬 chat\n"
|
|
1352
|
-
"🍋 💻 model management\n"
|
|
1353
|
-
"🍋 📄 docs\n"
|
|
1354
|
-
)
|
|
1355
|
-
|
|
1356
|
-
yield
|
|
1357
|
-
|
|
1358
|
-
|
|
1359
1411
|
# This file was originally licensed under Apache 2.0. It has been modified.
|
|
1360
1412
|
# Modifications Copyright (c) 2025 AMD
|
|
@@ -0,0 +1,262 @@
|
|
|
1
|
+
<!DOCTYPE html>
|
|
2
|
+
<html lang="en">
|
|
3
|
+
<head>
|
|
4
|
+
<meta charset="UTF-8">
|
|
5
|
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
|
6
|
+
<title>Lemonade Server</title>
|
|
7
|
+
<link rel="icon" href="data:,">
|
|
8
|
+
<link rel="stylesheet" href="/static/styles.css">
|
|
9
|
+
<script>
|
|
10
|
+
window.SERVER_PORT = {{SERVER_PORT}};
|
|
11
|
+
</script>
|
|
12
|
+
{{SERVER_MODELS_JS}}
|
|
13
|
+
</head>
|
|
14
|
+
<body>
|
|
15
|
+
<nav class="navbar">
|
|
16
|
+
<a href="https://github.com/lemonade-sdk/lemonade">GitHub</a>
|
|
17
|
+
<a href="https://lemonade-server.ai/docs/">Docs</a>
|
|
18
|
+
<a href="https://lemonade-server.ai/docs/server/server_models/">Models</a>
|
|
19
|
+
<a href="https://lemonade-server.ai/docs/server/apps/">Featured Apps</a>
|
|
20
|
+
</nav>
|
|
21
|
+
<main class="main">
|
|
22
|
+
<div class="title">🍋 Lemonade Server</div>
|
|
23
|
+
<div class="tab-container">
|
|
24
|
+
<div class="tabs">
|
|
25
|
+
<button class="tab active" id="tab-chat" onclick="showTab('chat')">LLM Chat</button>
|
|
26
|
+
<button class="tab" id="tab-models" onclick="showTab('models')">Model Management</button>
|
|
27
|
+
</div>
|
|
28
|
+
<div class="tab-content active" id="content-chat">
|
|
29
|
+
<div class="chat-container">
|
|
30
|
+
<div class="chat-history" id="chat-history"></div>
|
|
31
|
+
<div class="chat-input-row">
|
|
32
|
+
<select id="model-select"></select>
|
|
33
|
+
<input type="text" id="chat-input" placeholder="Type your message..." />
|
|
34
|
+
<button id="send-btn">Send</button>
|
|
35
|
+
</div>
|
|
36
|
+
</div>
|
|
37
|
+
</div>
|
|
38
|
+
<div class="tab-content" id="content-models">
|
|
39
|
+
<div class="model-mgmt-container">
|
|
40
|
+
<div class="model-mgmt-pane">
|
|
41
|
+
<h3>Installed Models</h3>
|
|
42
|
+
<table class="model-table" id="installed-models-table">
|
|
43
|
+
<colgroup><col style="width:100%"></colgroup>
|
|
44
|
+
<tbody id="installed-models-tbody"></tbody>
|
|
45
|
+
</table>
|
|
46
|
+
</div>
|
|
47
|
+
<div class="model-mgmt-pane">
|
|
48
|
+
<h3>Suggested Models</h3>
|
|
49
|
+
<table class="model-table" id="suggested-models-table">
|
|
50
|
+
<tbody id="suggested-models-tbody"></tbody>
|
|
51
|
+
</table>
|
|
52
|
+
</div>
|
|
53
|
+
</div>
|
|
54
|
+
</div>
|
|
55
|
+
</div>
|
|
56
|
+
</main>
|
|
57
|
+
<footer class="site-footer">
|
|
58
|
+
<div class="dad-joke">When life gives you LLMs, make an LLM aide.</div>
|
|
59
|
+
<div class="copyright">Copyright 2025 AMD</div>
|
|
60
|
+
</footer>
|
|
61
|
+
<script src="https://cdn.jsdelivr.net/npm/openai@4.21.0/dist/openai.min.js"></script>
|
|
62
|
+
<script>
|
|
63
|
+
// Tab switching logic
|
|
64
|
+
function showTab(tab) {
|
|
65
|
+
document.getElementById('tab-chat').classList.remove('active');
|
|
66
|
+
document.getElementById('tab-models').classList.remove('active');
|
|
67
|
+
document.getElementById('content-chat').classList.remove('active');
|
|
68
|
+
document.getElementById('content-models').classList.remove('active');
|
|
69
|
+
if (tab === 'chat') {
|
|
70
|
+
document.getElementById('tab-chat').classList.add('active');
|
|
71
|
+
document.getElementById('content-chat').classList.add('active');
|
|
72
|
+
} else {
|
|
73
|
+
document.getElementById('tab-models').classList.add('active');
|
|
74
|
+
document.getElementById('content-models').classList.add('active');
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
// Helper to get server base URL
|
|
79
|
+
function getServerBaseUrl() {
|
|
80
|
+
const port = window.SERVER_PORT || 8000;
|
|
81
|
+
return `http://localhost:${port}`;
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
// Populate model dropdown from /api/v1/models endpoint
|
|
85
|
+
async function loadModels() {
|
|
86
|
+
try {
|
|
87
|
+
const resp = await fetch(getServerBaseUrl() + '/api/v1/models');
|
|
88
|
+
const data = await resp.json();
|
|
89
|
+
const select = document.getElementById('model-select');
|
|
90
|
+
select.innerHTML = '';
|
|
91
|
+
if (!data.data || !Array.isArray(data.data)) {
|
|
92
|
+
select.innerHTML = '<option>No models found (malformed response)</option>';
|
|
93
|
+
return;
|
|
94
|
+
}
|
|
95
|
+
if (data.data.length === 0) {
|
|
96
|
+
select.innerHTML = '<option>No models available</option>';
|
|
97
|
+
return;
|
|
98
|
+
}
|
|
99
|
+
let defaultIndex = 0;
|
|
100
|
+
data.data.forEach(function(model, index) {
|
|
101
|
+
const modelId = model.id || model.name || model;
|
|
102
|
+
const opt = document.createElement('option');
|
|
103
|
+
opt.value = modelId;
|
|
104
|
+
opt.textContent = modelId;
|
|
105
|
+
if (modelId === 'Llama-3.2-1B-Instruct-Hybrid') {
|
|
106
|
+
defaultIndex = index;
|
|
107
|
+
}
|
|
108
|
+
select.appendChild(opt);
|
|
109
|
+
});
|
|
110
|
+
select.selectedIndex = defaultIndex;
|
|
111
|
+
} catch (e) {
|
|
112
|
+
const select = document.getElementById('model-select');
|
|
113
|
+
select.innerHTML = `<option>Error loading models: ${e.message}</option>`;
|
|
114
|
+
console.error('Error loading models:', e);
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
loadModels();
|
|
118
|
+
|
|
119
|
+
// Model Management Tab Logic
|
|
120
|
+
async function refreshModelMgmtUI() {
|
|
121
|
+
// Get installed models from /api/v1/models
|
|
122
|
+
let installed = [];
|
|
123
|
+
try {
|
|
124
|
+
const resp = await fetch(getServerBaseUrl() + '/api/v1/models');
|
|
125
|
+
const data = await resp.json();
|
|
126
|
+
if (data.data && Array.isArray(data.data)) {
|
|
127
|
+
installed = data.data.map(m => m.id || m.name || m);
|
|
128
|
+
}
|
|
129
|
+
} catch (e) {}
|
|
130
|
+
// All models from server_models.json (window.SERVER_MODELS)
|
|
131
|
+
const allModels = window.SERVER_MODELS || {};
|
|
132
|
+
// Filter suggested models not installed
|
|
133
|
+
const suggested = Object.keys(allModels).filter(
|
|
134
|
+
k => allModels[k].suggested && !installed.includes(k)
|
|
135
|
+
);
|
|
136
|
+
// Render installed models as a table (two columns, second is invisible)
|
|
137
|
+
const installedTbody = document.getElementById('installed-models-tbody');
|
|
138
|
+
installedTbody.innerHTML = '';
|
|
139
|
+
installed.forEach(function(mid) {
|
|
140
|
+
var tr = document.createElement('tr');
|
|
141
|
+
var tdName = document.createElement('td');
|
|
142
|
+
tdName.textContent = mid;
|
|
143
|
+
var tdEmpty = document.createElement('td');
|
|
144
|
+
tdEmpty.style.width = '0';
|
|
145
|
+
tdEmpty.style.padding = '0';
|
|
146
|
+
tdEmpty.style.border = 'none';
|
|
147
|
+
tr.appendChild(tdName);
|
|
148
|
+
tr.appendChild(tdEmpty);
|
|
149
|
+
installedTbody.appendChild(tr);
|
|
150
|
+
});
|
|
151
|
+
// Render suggested models as a table
|
|
152
|
+
const suggestedTbody = document.getElementById('suggested-models-tbody');
|
|
153
|
+
suggestedTbody.innerHTML = '';
|
|
154
|
+
suggested.forEach(mid => {
|
|
155
|
+
const tr = document.createElement('tr');
|
|
156
|
+
const tdName = document.createElement('td');
|
|
157
|
+
tdName.textContent = mid;
|
|
158
|
+
tdName.style.paddingRight = '1em';
|
|
159
|
+
tdName.style.verticalAlign = 'middle';
|
|
160
|
+
const tdBtn = document.createElement('td');
|
|
161
|
+
tdBtn.style.width = '1%';
|
|
162
|
+
tdBtn.style.verticalAlign = 'middle';
|
|
163
|
+
const btn = document.createElement('button');
|
|
164
|
+
btn.textContent = '+';
|
|
165
|
+
btn.title = 'Install model';
|
|
166
|
+
btn.onclick = async function() {
|
|
167
|
+
btn.disabled = true;
|
|
168
|
+
btn.textContent = 'Installing...';
|
|
169
|
+
btn.classList.add('installing-btn');
|
|
170
|
+
try {
|
|
171
|
+
await fetch(getServerBaseUrl() + '/api/v1/pull', {
|
|
172
|
+
method: 'POST',
|
|
173
|
+
headers: { 'Content-Type': 'application/json' },
|
|
174
|
+
body: JSON.stringify({ model_name: mid })
|
|
175
|
+
});
|
|
176
|
+
await refreshModelMgmtUI();
|
|
177
|
+
await loadModels(); // update chat dropdown too
|
|
178
|
+
} catch (e) {
|
|
179
|
+
btn.textContent = 'Error';
|
|
180
|
+
}
|
|
181
|
+
};
|
|
182
|
+
tdBtn.appendChild(btn);
|
|
183
|
+
tr.appendChild(tdName);
|
|
184
|
+
tr.appendChild(tdBtn);
|
|
185
|
+
suggestedTbody.appendChild(tr);
|
|
186
|
+
});
|
|
187
|
+
}
|
|
188
|
+
// Initial load
|
|
189
|
+
refreshModelMgmtUI();
|
|
190
|
+
// Optionally, refresh when switching to the tab
|
|
191
|
+
document.getElementById('tab-models').addEventListener('click', refreshModelMgmtUI);
|
|
192
|
+
|
|
193
|
+
// Chat logic (streaming with OpenAI JS client placeholder)
|
|
194
|
+
const chatHistory = document.getElementById('chat-history');
|
|
195
|
+
const chatInput = document.getElementById('chat-input');
|
|
196
|
+
const sendBtn = document.getElementById('send-btn');
|
|
197
|
+
const modelSelect = document.getElementById('model-select');
|
|
198
|
+
let messages = [];
|
|
199
|
+
|
|
200
|
+
function appendMessage(role, text) {
|
|
201
|
+
const div = document.createElement('div');
|
|
202
|
+
div.className = 'chat-message ' + role;
|
|
203
|
+
// Add a bubble for iMessage style
|
|
204
|
+
const bubble = document.createElement('div');
|
|
205
|
+
bubble.className = 'chat-bubble ' + role;
|
|
206
|
+
bubble.innerHTML = text;
|
|
207
|
+
div.appendChild(bubble);
|
|
208
|
+
chatHistory.appendChild(div);
|
|
209
|
+
chatHistory.scrollTop = chatHistory.scrollHeight;
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
async function sendMessage() {
|
|
213
|
+
const text = chatInput.value.trim();
|
|
214
|
+
if (!text) return;
|
|
215
|
+
appendMessage('user', text);
|
|
216
|
+
messages.push({ role: 'user', content: text });
|
|
217
|
+
chatInput.value = '';
|
|
218
|
+
sendBtn.disabled = true;
|
|
219
|
+
// Streaming OpenAI completions (placeholder, adapt as needed)
|
|
220
|
+
let llmText = '';
|
|
221
|
+
appendMessage('llm', '...');
|
|
222
|
+
const llmDiv = chatHistory.lastChild.querySelector('.chat-bubble.llm');
|
|
223
|
+
try {
|
|
224
|
+
// Use the correct endpoint for chat completions
|
|
225
|
+
const resp = await fetch(getServerBaseUrl() + '/api/v1/chat/completions', {
|
|
226
|
+
method: 'POST',
|
|
227
|
+
headers: { 'Content-Type': 'application/json' },
|
|
228
|
+
body: JSON.stringify({
|
|
229
|
+
model: modelSelect.value,
|
|
230
|
+
messages: messages,
|
|
231
|
+
stream: true
|
|
232
|
+
})
|
|
233
|
+
});
|
|
234
|
+
if (!resp.body) throw new Error('No stream');
|
|
235
|
+
const reader = resp.body.getReader();
|
|
236
|
+
let decoder = new TextDecoder();
|
|
237
|
+
llmDiv.textContent = '';
|
|
238
|
+
while (true) {
|
|
239
|
+
const { done, value } = await reader.read();
|
|
240
|
+
if (done) break;
|
|
241
|
+
const chunk = decoder.decode(value);
|
|
242
|
+
if (chunk.trim() === 'data: [DONE]' || chunk.trim() === '[DONE]') continue;
|
|
243
|
+
// Try to extract the content from the OpenAI chunk
|
|
244
|
+
const match = chunk.match(/"content"\s*:\s*"([^"]*)"/);
|
|
245
|
+
if (match && match[1]) {
|
|
246
|
+
llmText += match[1];
|
|
247
|
+
llmDiv.textContent = llmText;
|
|
248
|
+
}
|
|
249
|
+
}
|
|
250
|
+
messages.push({ role: 'assistant', content: llmText });
|
|
251
|
+
} catch (e) {
|
|
252
|
+
llmDiv.textContent = '[Error: ' + e.message + ']';
|
|
253
|
+
}
|
|
254
|
+
sendBtn.disabled = false;
|
|
255
|
+
}
|
|
256
|
+
sendBtn.onclick = sendMessage;
|
|
257
|
+
chatInput.addEventListener('keydown', function(e) {
|
|
258
|
+
if (e.key === 'Enter') sendMessage();
|
|
259
|
+
});
|
|
260
|
+
</script>
|
|
261
|
+
</body>
|
|
262
|
+
</html>
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
import threading
|
|
2
|
+
import logging
|
|
3
|
+
from lemonade.tools.server.serve import Server
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class ServerRunner(threading.Thread):
|
|
7
|
+
"""
|
|
8
|
+
Thread class for running the Lemonade Server with a loaded model.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
def __init__(
|
|
12
|
+
self, model, tokenizer, checkpoint, recipe, host="localhost", port=8000
|
|
13
|
+
):
|
|
14
|
+
threading.Thread.__init__(self)
|
|
15
|
+
self.model = model
|
|
16
|
+
self.tokenizer = tokenizer
|
|
17
|
+
self.checkpoint = checkpoint
|
|
18
|
+
self.recipe = recipe
|
|
19
|
+
self.host = host
|
|
20
|
+
self.port = port
|
|
21
|
+
self.server = None
|
|
22
|
+
self.ready_event = threading.Event()
|
|
23
|
+
self.shutdown_event = threading.Event()
|
|
24
|
+
self.uvicorn_server = None
|
|
25
|
+
|
|
26
|
+
def run(self):
|
|
27
|
+
try:
|
|
28
|
+
# Create the server instance
|
|
29
|
+
self.server = Server()
|
|
30
|
+
|
|
31
|
+
# Configure the server with model/tokenizer
|
|
32
|
+
self.server.model = self.model
|
|
33
|
+
self.server.tokenizer = self.tokenizer
|
|
34
|
+
self.server.llm_loaded = type(
|
|
35
|
+
"obj",
|
|
36
|
+
(object,),
|
|
37
|
+
{
|
|
38
|
+
"checkpoint": self.checkpoint,
|
|
39
|
+
"recipe": self.recipe,
|
|
40
|
+
"max_prompt_length": None,
|
|
41
|
+
"reasoning": False,
|
|
42
|
+
"model_name": "custom",
|
|
43
|
+
},
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
# Set up the server for threaded execution
|
|
47
|
+
self.uvicorn_server = self.server.run_in_thread(
|
|
48
|
+
port=self.port, host=self.host, log_level="warning"
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
# Set the ready event
|
|
52
|
+
self.ready_event.set()
|
|
53
|
+
|
|
54
|
+
# Run the server until shutdown is requested
|
|
55
|
+
logging.info(f"Starting server on http://{self.host}:{self.port}")
|
|
56
|
+
self.uvicorn_server.run()
|
|
57
|
+
|
|
58
|
+
except Exception as e:
|
|
59
|
+
logging.error(f"Error starting server: {e}")
|
|
60
|
+
self.ready_event.set()
|
|
61
|
+
raise
|
|
62
|
+
|
|
63
|
+
def shutdown(self):
|
|
64
|
+
"""Shutdown the server"""
|
|
65
|
+
if hasattr(self, "uvicorn_server") and self.uvicorn_server:
|
|
66
|
+
logging.info("Shutting down server...")
|
|
67
|
+
self.uvicorn_server.should_exit = True
|
|
68
|
+
self.shutdown_event.set()
|
|
69
|
+
|
|
70
|
+
# Clean up resources properly to avoid memory leaks
|
|
71
|
+
if hasattr(self, "server") and self.server:
|
|
72
|
+
logging.info("Cleaning up model and tokenizer resources...")
|
|
73
|
+
|
|
74
|
+
if hasattr(self.server, "model"):
|
|
75
|
+
self.server.model = None
|
|
76
|
+
|
|
77
|
+
if hasattr(self.server, "tokenizer"):
|
|
78
|
+
self.server.tokenizer = None
|
|
79
|
+
|
|
80
|
+
if hasattr(self.server, "llm_loaded"):
|
|
81
|
+
self.server.llm_loaded = None
|
|
82
|
+
|
|
83
|
+
# Clean up local references
|
|
84
|
+
if hasattr(self, "model"):
|
|
85
|
+
del self.model
|
|
86
|
+
if hasattr(self, "tokenizer"):
|
|
87
|
+
del self.tokenizer
|
lemonade/version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "7.0.
|
|
1
|
+
__version__ = "7.0.3"
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
lemonade/__init__.py,sha256=W1Qk7r0rnQqFhPNHp6BIBT_q-OH3s-8Q_POoVfAmKW0,117
|
|
2
2
|
lemonade/api.py,sha256=9apNWSMS4bYpYl7iqDA4CsHHOOMdjOIuJhNYSqj_jIA,3878
|
|
3
3
|
lemonade/cache.py,sha256=djr2qgyUUAWlQv8FehU9qlNtCwK0IZqo82hcBDyZ3-A,2850
|
|
4
|
-
lemonade/cli.py,sha256=
|
|
4
|
+
lemonade/cli.py,sha256=ddN2QqsGMsVwydfcR7MSZu1z8_-bUgUP7dhw9lzbHa8,4424
|
|
5
5
|
lemonade/sequence.py,sha256=KSH7BPsiyDKsOsg_ziQKEGsDwMmuO_YbgPRBxkZd0pw,13267
|
|
6
6
|
lemonade/state.py,sha256=sdSezla7Cd7KYL90xY3p9kcNV4ndSyN6UvNLOr3vBMA,5261
|
|
7
|
-
lemonade/version.py,sha256=
|
|
7
|
+
lemonade/version.py,sha256=Ur-fY8dgd79WuOM208uDSw5amQiSzM7VmTbWPLQBZvw,22
|
|
8
8
|
lemonade/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
9
9
|
lemonade/common/analyze_model.py,sha256=sYWDznEUEWjx_Qekg7f1hHY4Pfe87IQ77lmsWqePgE0,803
|
|
10
10
|
lemonade/common/build.py,sha256=Pk86mCr6fyBIx2zXDpq0BkdahlCmWRnwSTpShA_gwZw,7849
|
|
@@ -23,20 +23,21 @@ lemonade/profilers/__init__.py,sha256=JKVonvJ4XZ9_6sKXPWsiMLQCNyzQOxhQw5BEHR1qOf
|
|
|
23
23
|
lemonade/profilers/memory_tracker.py,sha256=-SSBmNlrweiX59wyNtLMWiwaMOskBzNO1_cufVwteqs,9357
|
|
24
24
|
lemonade/profilers/profiler.py,sha256=y_iMGr1ToQ6rcwcIcXck4ajapisLXCfHggiV-IpPF98,1666
|
|
25
25
|
lemonade/tools/__init__.py,sha256=_6xRc-FHxmujoLjLjWtpYrWYEXtCSneSy-5ya01kyPk,53
|
|
26
|
+
lemonade/tools/accuracy.py,sha256=QndammQ1bmlTaF_6YDaaiJp6fpkKZDYGySdQpAgZIp8,11699
|
|
26
27
|
lemonade/tools/adapter.py,sha256=4H6gfbjvqyU6qm1_-b2FE-c3a7N9OzEBeDVnIwqRDvg,3014
|
|
27
28
|
lemonade/tools/bench.py,sha256=aN5LMA_EH6-ZhAH3Gf26JYL7s0eKpUd3j-bReRhzvEY,10016
|
|
28
29
|
lemonade/tools/huggingface_bench.py,sha256=POE5JYzArK2FBktazOkluLNFzlLctM39B19fK5sMx-0,10017
|
|
29
|
-
lemonade/tools/huggingface_load.py,sha256=
|
|
30
|
+
lemonade/tools/huggingface_load.py,sha256=857GxaQcqmSv2DSsMh503aSicwQDQg5wGGlpwehHHrg,18868
|
|
30
31
|
lemonade/tools/humaneval.py,sha256=RCkVR-yOL56T4EyURaU3MR3yhU4NCbeuWHDyhVWZtxw,9502
|
|
31
32
|
lemonade/tools/llamacpp.py,sha256=uv-xv5KfHm0eU1I6vEKuaRC-QpilE1FffVA-zoCvHt4,8659
|
|
32
33
|
lemonade/tools/llamacpp_bench.py,sha256=tZamG-1Z5pG_bD4O4yz2mUo2AWwEgOw9RSdEDllW4HY,5941
|
|
33
34
|
lemonade/tools/management_tools.py,sha256=RO-lU-hjZhrP9KD9qcLI7MrLu-Rxnkrxzn45qqwKInE,8554
|
|
34
35
|
lemonade/tools/mmlu.py,sha256=hNa7A8dhpjOtgfd5MGcagpwpw4_AZRZvVj5Duz9LJ88,11381
|
|
35
36
|
lemonade/tools/perplexity.py,sha256=Z6ha7LS5DhdZWHZxhDz8mDnfESbTGc6TGo8KnPjRmiE,5606
|
|
36
|
-
lemonade/tools/prompt.py,sha256=
|
|
37
|
+
lemonade/tools/prompt.py,sha256=AhRdWpx5BVnuJTmCsxSCw_oKHRlTiRLmOkriXon_mLE,8629
|
|
37
38
|
lemonade/tools/tool.py,sha256=UsxVYukfm_iM3BfeGYPZxQlTK5UfDfDOl3RIyLr8A1Y,13256
|
|
38
39
|
lemonade/tools/ort_genai/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
39
|
-
lemonade/tools/ort_genai/oga.py,sha256
|
|
40
|
+
lemonade/tools/ort_genai/oga.py,sha256=dZ6kbwHBVfzTujAG0ojYDhjS8uH6kwW5xZTcu20hFIc,43886
|
|
40
41
|
lemonade/tools/ort_genai/oga_bench.py,sha256=T3c40NevM3NA7CT98B6vBj1nXfdITDqpfMHYSjhjwpA,5061
|
|
41
42
|
lemonade/tools/quark/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
42
43
|
lemonade/tools/quark/quark_load.py,sha256=QWzhXP8MehgD_KjnsmN5a-3D5kdI2XZtKTH4HoDoFoo,5572
|
|
@@ -45,21 +46,24 @@ lemonade/tools/report/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3h
|
|
|
45
46
|
lemonade/tools/report/llm_report.py,sha256=bVHhwCINA-Ok2EdSwAsLubsc83N3KWOVuwTguw7jDcE,6676
|
|
46
47
|
lemonade/tools/report/table.py,sha256=a0TXo1X84RxCSu0un_XM3ANOlhLtPDuqtGwR7eomf2s,24853
|
|
47
48
|
lemonade/tools/server/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
48
|
-
lemonade/tools/server/instructions.py,sha256=
|
|
49
|
-
lemonade/tools/server/llamacpp.py,sha256=
|
|
50
|
-
lemonade/tools/server/
|
|
51
|
-
lemonade/tools/server/serve.py,sha256=
|
|
49
|
+
lemonade/tools/server/instructions.py,sha256=PbQ8HItagIWbJLYf2IVPhthYVi1E878vNdS42qmTc3E,1230
|
|
50
|
+
lemonade/tools/server/llamacpp.py,sha256=YqUzx-TmyvWMrZfue7xURFfgTRLPGGSzNJtF9GERC_8,10184
|
|
51
|
+
lemonade/tools/server/port_utils.py,sha256=24Ryz5cNU0R9L1kuVSapZoyXTZHzhF4y0Yje9MVOrE0,1535
|
|
52
|
+
lemonade/tools/server/serve.py,sha256=O2ZcM1xogIRAqBE49tQ-gTFpEXExlwHOT3bYL1rZgmc,52483
|
|
53
|
+
lemonade/tools/server/thread_utils.py,sha256=pK9K_6DNWoQ78NArkAX3Ym2WsxLnCs9sKTk6TitlYnI,2804
|
|
52
54
|
lemonade/tools/server/tool_calls.py,sha256=xrAlQwKG-nv2xLlf8f9CDSaUbyMn8ZtHkds9iZLG9K8,5230
|
|
55
|
+
lemonade/tools/server/static/instructions.html,sha256=tCkc55LrI4oWQM2VYuK3_m02MvG5XxIcTbCSgxyTAIU,11257
|
|
53
56
|
lemonade/tools/server/static/styles.css,sha256=8U1EejQaqRLQ6QTCF5UG_dLPtLjRwT1menUHMDhaq2M,5045
|
|
54
57
|
lemonade_install/__init__.py,sha256=26zohKg2jgr_5y7tObduWMYQg8zCTWMZHL8lfi2zZVQ,40
|
|
55
58
|
lemonade_install/install.py,sha256=61qUO7kWCLcdjK0_IQZ46-rKP_AWkyznh4YpDclPKyM,28036
|
|
56
|
-
lemonade_sdk-7.0.
|
|
57
|
-
lemonade_sdk-7.0.
|
|
59
|
+
lemonade_sdk-7.0.3.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
60
|
+
lemonade_sdk-7.0.3.dist-info/licenses/NOTICE.md,sha256=B8lEqi4QE41J9ljz4Riv2JgHD1v8GCZE6nNBHO3KIA0,2135
|
|
58
61
|
lemonade_server/cli.py,sha256=DR6sIt66K1sZZG3ascEw_6HUgz3UhU9KGUyzxf4nO_A,7351
|
|
59
|
-
lemonade_server/model_manager.py,sha256
|
|
60
|
-
lemonade_server/
|
|
61
|
-
|
|
62
|
-
lemonade_sdk-7.0.
|
|
63
|
-
lemonade_sdk-7.0.
|
|
64
|
-
lemonade_sdk-7.0.
|
|
65
|
-
lemonade_sdk-7.0.
|
|
62
|
+
lemonade_server/model_manager.py,sha256=-r9JS_fPcoLCQCFKZfkInBIIgT4F1tQ_EIKqMqNYpqM,5546
|
|
63
|
+
lemonade_server/pydantic_models.py,sha256=pdOZW6nAYKWKllMLR7y5wdbIofIznxe5Vehac0Hgqto,2276
|
|
64
|
+
lemonade_server/server_models.json,sha256=3C-lJ2lsNwdy0AKT_US_lcVOoiF3xmadbiOUeOQuJXA,6927
|
|
65
|
+
lemonade_sdk-7.0.3.dist-info/METADATA,sha256=pSSPTu7kUyAh4W8lCVvxS-WAnjMT9Dsyw0r0WHcrxgA,5443
|
|
66
|
+
lemonade_sdk-7.0.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
67
|
+
lemonade_sdk-7.0.3.dist-info/entry_points.txt,sha256=gJppn0ETtXXR6ceKWEIRdk42kMC7ps59EmU3NCPyPUk,144
|
|
68
|
+
lemonade_sdk-7.0.3.dist-info/top_level.txt,sha256=10ap5GNiPhalO4V50LRoxA1FqRT9g3Xkia6BITu880k,42
|
|
69
|
+
lemonade_sdk-7.0.3.dist-info/RECORD,,
|