lemonade-sdk 8.1.4__py3-none-any.whl → 8.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lemonade-sdk might be problematic. Click here for more details.
- lemonade/cache.py +6 -1
- lemonade/cli.py +47 -5
- lemonade/common/inference_engines.py +13 -4
- lemonade/common/status.py +4 -4
- lemonade/common/system_info.py +544 -1
- lemonade/profilers/agt_power.py +437 -0
- lemonade/profilers/hwinfo_power.py +429 -0
- lemonade/tools/accuracy.py +143 -48
- lemonade/tools/adapter.py +6 -1
- lemonade/tools/bench.py +26 -8
- lemonade/tools/flm/__init__.py +1 -0
- lemonade/tools/flm/utils.py +303 -0
- lemonade/tools/huggingface/bench.py +6 -1
- lemonade/tools/llamacpp/bench.py +146 -27
- lemonade/tools/llamacpp/load.py +30 -2
- lemonade/tools/llamacpp/utils.py +393 -33
- lemonade/tools/oga/bench.py +5 -26
- lemonade/tools/oga/load.py +60 -121
- lemonade/tools/oga/migration.py +403 -0
- lemonade/tools/report/table.py +76 -8
- lemonade/tools/server/flm.py +133 -0
- lemonade/tools/server/llamacpp.py +220 -553
- lemonade/tools/server/serve.py +684 -168
- lemonade/tools/server/static/js/chat.js +666 -342
- lemonade/tools/server/static/js/model-settings.js +24 -3
- lemonade/tools/server/static/js/models.js +597 -73
- lemonade/tools/server/static/js/shared.js +79 -14
- lemonade/tools/server/static/logs.html +191 -0
- lemonade/tools/server/static/styles.css +491 -66
- lemonade/tools/server/static/webapp.html +83 -31
- lemonade/tools/server/tray.py +158 -38
- lemonade/tools/server/utils/macos_tray.py +226 -0
- lemonade/tools/server/utils/{system_tray.py → windows_tray.py} +13 -0
- lemonade/tools/server/webapp.py +4 -1
- lemonade/tools/server/wrapped_server.py +559 -0
- lemonade/version.py +1 -1
- lemonade_install/install.py +54 -611
- {lemonade_sdk-8.1.4.dist-info → lemonade_sdk-8.2.2.dist-info}/METADATA +29 -72
- lemonade_sdk-8.2.2.dist-info/RECORD +83 -0
- lemonade_server/cli.py +145 -37
- lemonade_server/model_manager.py +521 -37
- lemonade_server/pydantic_models.py +28 -1
- lemonade_server/server_models.json +246 -92
- lemonade_server/settings.py +39 -39
- lemonade/tools/quark/__init__.py +0 -0
- lemonade/tools/quark/quark_load.py +0 -173
- lemonade/tools/quark/quark_quantize.py +0 -439
- lemonade_sdk-8.1.4.dist-info/RECORD +0 -77
- {lemonade_sdk-8.1.4.dist-info → lemonade_sdk-8.2.2.dist-info}/WHEEL +0 -0
- {lemonade_sdk-8.1.4.dist-info → lemonade_sdk-8.2.2.dist-info}/entry_points.txt +0 -0
- {lemonade_sdk-8.1.4.dist-info → lemonade_sdk-8.2.2.dist-info}/licenses/LICENSE +0 -0
- {lemonade_sdk-8.1.4.dist-info → lemonade_sdk-8.2.2.dist-info}/licenses/NOTICE.md +0 -0
- {lemonade_sdk-8.1.4.dist-info → lemonade_sdk-8.2.2.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,403 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Migration utilities for handling RyzenAI version upgrades.
|
|
3
|
+
|
|
4
|
+
This module provides functionality to detect and clean up incompatible RyzenAI models
|
|
5
|
+
when upgrading between major versions (e.g., 1.4/1.5 -> 1.6).
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import os
|
|
9
|
+
import json
|
|
10
|
+
import shutil
|
|
11
|
+
import logging
|
|
12
|
+
from typing import List, Dict, Optional, Tuple
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def get_directory_size(path: str) -> int:
|
|
16
|
+
"""
|
|
17
|
+
Calculate the total size of a directory in bytes.
|
|
18
|
+
|
|
19
|
+
Args:
|
|
20
|
+
path: Path to the directory
|
|
21
|
+
|
|
22
|
+
Returns:
|
|
23
|
+
Total size in bytes
|
|
24
|
+
"""
|
|
25
|
+
total_size = 0
|
|
26
|
+
try:
|
|
27
|
+
for dirpath, _, filenames in os.walk(path):
|
|
28
|
+
for filename in filenames:
|
|
29
|
+
filepath = os.path.join(dirpath, filename)
|
|
30
|
+
try:
|
|
31
|
+
total_size += os.path.getsize(filepath)
|
|
32
|
+
except (OSError, FileNotFoundError):
|
|
33
|
+
# Skip files that can't be accessed
|
|
34
|
+
pass
|
|
35
|
+
except (OSError, FileNotFoundError):
|
|
36
|
+
pass
|
|
37
|
+
return total_size
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def format_size(size_bytes: int) -> str:
|
|
41
|
+
"""
|
|
42
|
+
Format byte size to human-readable string.
|
|
43
|
+
|
|
44
|
+
Args:
|
|
45
|
+
size_bytes: Size in bytes
|
|
46
|
+
|
|
47
|
+
Returns:
|
|
48
|
+
Formatted string (e.g., "1.5 GB", "450 MB")
|
|
49
|
+
"""
|
|
50
|
+
for unit in ["B", "KB", "MB"]:
|
|
51
|
+
if size_bytes < 1024.0:
|
|
52
|
+
return f"{size_bytes:.1f} {unit}"
|
|
53
|
+
size_bytes /= 1024.0
|
|
54
|
+
return f"{size_bytes:.1f} GB"
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def check_rai_config_version(model_path: str, required_version: str = "1.6.0") -> bool:
|
|
58
|
+
"""
|
|
59
|
+
Check if a model's rai_config.json contains the required version.
|
|
60
|
+
|
|
61
|
+
Args:
|
|
62
|
+
model_path: Path to the model directory
|
|
63
|
+
required_version: Version string to check for (default: "1.6.0")
|
|
64
|
+
|
|
65
|
+
Returns:
|
|
66
|
+
True if model is compatible (has required version), False otherwise
|
|
67
|
+
"""
|
|
68
|
+
rai_config_path = os.path.join(model_path, "rai_config.json")
|
|
69
|
+
|
|
70
|
+
# If no rai_config.json exists, it's not a RyzenAI model
|
|
71
|
+
if not os.path.exists(rai_config_path):
|
|
72
|
+
return True
|
|
73
|
+
|
|
74
|
+
try:
|
|
75
|
+
with open(rai_config_path, "r", encoding="utf-8") as f:
|
|
76
|
+
config = json.load(f)
|
|
77
|
+
|
|
78
|
+
# Check if max_prompt_length exists and has the required version
|
|
79
|
+
if "max_prompt_length" in config:
|
|
80
|
+
max_prompt_length = config["max_prompt_length"]
|
|
81
|
+
if isinstance(max_prompt_length, dict):
|
|
82
|
+
# If it's a dict with version keys, check for required version
|
|
83
|
+
return required_version in max_prompt_length
|
|
84
|
+
# Fallback to True to avoid deleting models if format changes
|
|
85
|
+
return True
|
|
86
|
+
|
|
87
|
+
return True
|
|
88
|
+
|
|
89
|
+
except (json.JSONDecodeError, OSError) as e:
|
|
90
|
+
logging.warning(f"Could not read rai_config.json from {model_path}: {e}")
|
|
91
|
+
# If we can't read it, assume it's compatible to avoid false positives
|
|
92
|
+
return True
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def scan_oga_models_cache(cache_dir: str) -> List[Dict[str, any]]:
|
|
96
|
+
"""
|
|
97
|
+
Scan the Lemonade OGA models cache for incompatible models.
|
|
98
|
+
|
|
99
|
+
Args:
|
|
100
|
+
cache_dir: Path to the Lemonade cache directory
|
|
101
|
+
|
|
102
|
+
Returns:
|
|
103
|
+
List of dicts with model info (path, name, size, compatible)
|
|
104
|
+
"""
|
|
105
|
+
oga_models_path = os.path.join(cache_dir, "oga_models")
|
|
106
|
+
incompatible_models = []
|
|
107
|
+
|
|
108
|
+
if not os.path.exists(oga_models_path):
|
|
109
|
+
return incompatible_models
|
|
110
|
+
|
|
111
|
+
try:
|
|
112
|
+
# Iterate through model directories in oga_models
|
|
113
|
+
for model_name in os.listdir(oga_models_path):
|
|
114
|
+
model_dir = os.path.join(oga_models_path, model_name)
|
|
115
|
+
|
|
116
|
+
if not os.path.isdir(model_dir):
|
|
117
|
+
continue
|
|
118
|
+
|
|
119
|
+
# Check all subdirectories (e.g., npu-int4, hybrid-int4)
|
|
120
|
+
for subdir in os.listdir(model_dir):
|
|
121
|
+
subdir_path = os.path.join(model_dir, subdir)
|
|
122
|
+
|
|
123
|
+
if not os.path.isdir(subdir_path):
|
|
124
|
+
continue
|
|
125
|
+
|
|
126
|
+
# Check if this model version is compatible
|
|
127
|
+
if not check_rai_config_version(subdir_path):
|
|
128
|
+
size = get_directory_size(subdir_path)
|
|
129
|
+
incompatible_models.append(
|
|
130
|
+
{
|
|
131
|
+
"path": subdir_path,
|
|
132
|
+
"name": f"{model_name}/{subdir}",
|
|
133
|
+
"size": size,
|
|
134
|
+
"size_formatted": format_size(size),
|
|
135
|
+
"cache_type": "lemonade",
|
|
136
|
+
}
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
except (OSError, PermissionError) as e:
|
|
140
|
+
logging.warning(f"Error scanning oga_models cache: {e}")
|
|
141
|
+
|
|
142
|
+
return incompatible_models
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def scan_huggingface_cache(hf_home: Optional[str] = None) -> List[Dict[str, any]]:
|
|
146
|
+
"""
|
|
147
|
+
Scan the HuggingFace cache for incompatible RyzenAI models.
|
|
148
|
+
|
|
149
|
+
Args:
|
|
150
|
+
hf_home: Path to HuggingFace home directory (default: from env or ~/.cache/huggingface)
|
|
151
|
+
|
|
152
|
+
Returns:
|
|
153
|
+
List of dicts with model info (path, name, size, compatible)
|
|
154
|
+
"""
|
|
155
|
+
if hf_home is None:
|
|
156
|
+
hf_home = os.environ.get(
|
|
157
|
+
"HF_HOME", os.path.join(os.path.expanduser("~"), ".cache", "huggingface")
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
hub_path = os.path.join(hf_home, "hub")
|
|
161
|
+
incompatible_models = []
|
|
162
|
+
|
|
163
|
+
if not os.path.exists(hub_path):
|
|
164
|
+
return incompatible_models
|
|
165
|
+
|
|
166
|
+
try:
|
|
167
|
+
# Iterate through model directories in HuggingFace cache
|
|
168
|
+
for item in os.listdir(hub_path):
|
|
169
|
+
if not item.startswith("models--"):
|
|
170
|
+
continue
|
|
171
|
+
|
|
172
|
+
model_dir = os.path.join(hub_path, item)
|
|
173
|
+
if not os.path.isdir(model_dir):
|
|
174
|
+
continue
|
|
175
|
+
|
|
176
|
+
# Look in snapshots subdirectory
|
|
177
|
+
snapshots_dir = os.path.join(model_dir, "snapshots")
|
|
178
|
+
if not os.path.exists(snapshots_dir):
|
|
179
|
+
continue
|
|
180
|
+
|
|
181
|
+
# Check each snapshot
|
|
182
|
+
for snapshot_hash in os.listdir(snapshots_dir):
|
|
183
|
+
snapshot_path = os.path.join(snapshots_dir, snapshot_hash)
|
|
184
|
+
|
|
185
|
+
if not os.path.isdir(snapshot_path):
|
|
186
|
+
continue
|
|
187
|
+
|
|
188
|
+
# Check if this snapshot has incompatible RyzenAI model
|
|
189
|
+
if not check_rai_config_version(snapshot_path):
|
|
190
|
+
# Extract readable model name from directory
|
|
191
|
+
model_name = item.replace("models--", "").replace("--", "/")
|
|
192
|
+
size = get_directory_size(
|
|
193
|
+
model_dir
|
|
194
|
+
) # Size of entire model directory
|
|
195
|
+
incompatible_models.append(
|
|
196
|
+
{
|
|
197
|
+
"path": model_dir,
|
|
198
|
+
"name": model_name,
|
|
199
|
+
"size": size,
|
|
200
|
+
"size_formatted": format_size(size),
|
|
201
|
+
"cache_type": "huggingface",
|
|
202
|
+
}
|
|
203
|
+
)
|
|
204
|
+
break
|
|
205
|
+
|
|
206
|
+
except (OSError, PermissionError) as e:
|
|
207
|
+
logging.warning(f"Error scanning HuggingFace cache: {e}")
|
|
208
|
+
|
|
209
|
+
return incompatible_models
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
def detect_incompatible_ryzenai_models(
|
|
213
|
+
cache_dir: str, hf_home: Optional[str] = None
|
|
214
|
+
) -> Tuple[List[Dict[str, any]], int]:
|
|
215
|
+
"""
|
|
216
|
+
Detect all incompatible RyzenAI models in both Lemonade and HuggingFace caches.
|
|
217
|
+
|
|
218
|
+
Args:
|
|
219
|
+
cache_dir: Path to the Lemonade cache directory
|
|
220
|
+
hf_home: Path to HuggingFace home directory (optional)
|
|
221
|
+
|
|
222
|
+
Returns:
|
|
223
|
+
Tuple of (list of incompatible models, total size in bytes)
|
|
224
|
+
"""
|
|
225
|
+
incompatible_models = []
|
|
226
|
+
|
|
227
|
+
# Scan Lemonade cache
|
|
228
|
+
oga_models = scan_oga_models_cache(cache_dir)
|
|
229
|
+
incompatible_models.extend(oga_models)
|
|
230
|
+
|
|
231
|
+
# Scan HuggingFace cache
|
|
232
|
+
hf_models = scan_huggingface_cache(hf_home)
|
|
233
|
+
incompatible_models.extend(hf_models)
|
|
234
|
+
|
|
235
|
+
# Calculate total size
|
|
236
|
+
total_size = sum(model["size"] for model in incompatible_models)
|
|
237
|
+
|
|
238
|
+
logging.info(
|
|
239
|
+
f"Found {len(incompatible_models)} incompatible RyzenAI models "
|
|
240
|
+
f"({format_size(total_size)} total)"
|
|
241
|
+
)
|
|
242
|
+
|
|
243
|
+
return incompatible_models, total_size
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
def delete_model_directory(model_path: str) -> bool:
|
|
247
|
+
"""
|
|
248
|
+
Safely delete a model directory.
|
|
249
|
+
|
|
250
|
+
Args:
|
|
251
|
+
model_path: Path to the model directory to delete
|
|
252
|
+
|
|
253
|
+
Returns:
|
|
254
|
+
True if deletion successful, False otherwise
|
|
255
|
+
"""
|
|
256
|
+
try:
|
|
257
|
+
if os.path.exists(model_path):
|
|
258
|
+
shutil.rmtree(model_path)
|
|
259
|
+
logging.info(f"Deleted model directory: {model_path}")
|
|
260
|
+
return True
|
|
261
|
+
else:
|
|
262
|
+
logging.warning(f"Model directory not found: {model_path}")
|
|
263
|
+
return False
|
|
264
|
+
except (OSError, PermissionError) as e:
|
|
265
|
+
logging.error(f"Failed to delete model directory {model_path}: {e}")
|
|
266
|
+
return False
|
|
267
|
+
|
|
268
|
+
|
|
269
|
+
def _extract_checkpoint_from_path(path: str) -> Optional[str]:
|
|
270
|
+
"""
|
|
271
|
+
Extract the checkpoint name from a model path.
|
|
272
|
+
|
|
273
|
+
Args:
|
|
274
|
+
path: Model directory path (either Lemonade cache or HuggingFace cache)
|
|
275
|
+
|
|
276
|
+
Returns:
|
|
277
|
+
Checkpoint name (e.g., "amd/Qwen2.5-1.5B-Instruct-awq") or None if not extractable
|
|
278
|
+
"""
|
|
279
|
+
# Normalize path separators to handle both Unix and Windows paths
|
|
280
|
+
normalized_path = path.replace("\\", "/")
|
|
281
|
+
parts = normalized_path.split("/")
|
|
282
|
+
|
|
283
|
+
# Handle HuggingFace cache paths: models--{org}--{repo}
|
|
284
|
+
if "models--" in normalized_path:
|
|
285
|
+
for part in parts:
|
|
286
|
+
if part.startswith("models--"):
|
|
287
|
+
# Convert models--org--repo to org/repo
|
|
288
|
+
# Replace first two occurrences of -- with /
|
|
289
|
+
checkpoint = part.replace("models--", "", 1).replace("--", "/", 1)
|
|
290
|
+
return checkpoint
|
|
291
|
+
return None
|
|
292
|
+
|
|
293
|
+
# Handle Lemonade cache paths: oga_models/{model_name}/{device}-{dtype}
|
|
294
|
+
if "oga_models" in normalized_path:
|
|
295
|
+
try:
|
|
296
|
+
oga_models_idx = parts.index("oga_models")
|
|
297
|
+
if oga_models_idx + 1 < len(parts):
|
|
298
|
+
model_name = parts[oga_models_idx + 1]
|
|
299
|
+
# Convert model_name back to checkpoint (e.g., amd_model -> amd/model)
|
|
300
|
+
# This is a heuristic - we look for the pattern {org}_{model}
|
|
301
|
+
checkpoint = model_name.replace("_", "/", 1)
|
|
302
|
+
return checkpoint
|
|
303
|
+
except (ValueError, IndexError):
|
|
304
|
+
return None
|
|
305
|
+
|
|
306
|
+
return None
|
|
307
|
+
|
|
308
|
+
|
|
309
|
+
def _cleanup_user_models_json(deleted_checkpoints: List[str], user_models_file: str):
|
|
310
|
+
"""
|
|
311
|
+
Remove entries from user_models.json for models that have been deleted.
|
|
312
|
+
|
|
313
|
+
Args:
|
|
314
|
+
deleted_checkpoints: List of checkpoint names that were deleted
|
|
315
|
+
user_models_file: Path to user_models.json
|
|
316
|
+
"""
|
|
317
|
+
if not deleted_checkpoints or not os.path.exists(user_models_file):
|
|
318
|
+
return
|
|
319
|
+
|
|
320
|
+
try:
|
|
321
|
+
with open(user_models_file, "r", encoding="utf-8") as f:
|
|
322
|
+
user_models = json.load(f)
|
|
323
|
+
|
|
324
|
+
# Track which models to remove
|
|
325
|
+
models_to_remove = []
|
|
326
|
+
for model_name, model_info in user_models.items():
|
|
327
|
+
checkpoint = model_info.get("checkpoint", "")
|
|
328
|
+
# Check if this checkpoint matches any deleted checkpoints
|
|
329
|
+
# We do a case-insensitive comparison since paths may have been lowercased
|
|
330
|
+
for deleted_checkpoint in deleted_checkpoints:
|
|
331
|
+
if checkpoint.lower() == deleted_checkpoint.lower():
|
|
332
|
+
models_to_remove.append(model_name)
|
|
333
|
+
break
|
|
334
|
+
|
|
335
|
+
# Remove the models
|
|
336
|
+
for model_name in models_to_remove:
|
|
337
|
+
del user_models[model_name]
|
|
338
|
+
logging.info(f"Removed {model_name} from user_models.json")
|
|
339
|
+
|
|
340
|
+
# Save the updated file only if we removed something
|
|
341
|
+
if models_to_remove:
|
|
342
|
+
with open(user_models_file, "w", encoding="utf-8") as f:
|
|
343
|
+
json.dump(user_models, f, indent=2)
|
|
344
|
+
logging.info(
|
|
345
|
+
f"Updated user_models.json - removed {len(models_to_remove)} entries"
|
|
346
|
+
)
|
|
347
|
+
|
|
348
|
+
except (json.JSONDecodeError, OSError) as e:
|
|
349
|
+
logging.warning(f"Could not update user_models.json: {e}")
|
|
350
|
+
|
|
351
|
+
|
|
352
|
+
def delete_incompatible_models(
|
|
353
|
+
model_paths: List[str], user_models_file: Optional[str] = None
|
|
354
|
+
) -> Dict[str, any]:
|
|
355
|
+
"""
|
|
356
|
+
Delete multiple incompatible model directories and clean up user_models.json.
|
|
357
|
+
|
|
358
|
+
Args:
|
|
359
|
+
model_paths: List of paths to delete
|
|
360
|
+
user_models_file: Path to user_models.json (optional, will use default if not provided)
|
|
361
|
+
|
|
362
|
+
Returns:
|
|
363
|
+
Dict with deletion results (success_count, failed_count, freed_size, cleaned_user_models)
|
|
364
|
+
"""
|
|
365
|
+
success_count = 0
|
|
366
|
+
failed_count = 0
|
|
367
|
+
freed_size = 0
|
|
368
|
+
deleted_checkpoints = []
|
|
369
|
+
|
|
370
|
+
for path in model_paths:
|
|
371
|
+
# Calculate size before deletion
|
|
372
|
+
size = get_directory_size(path)
|
|
373
|
+
|
|
374
|
+
# Extract checkpoint name before deleting
|
|
375
|
+
checkpoint = _extract_checkpoint_from_path(path)
|
|
376
|
+
if checkpoint:
|
|
377
|
+
deleted_checkpoints.append(checkpoint)
|
|
378
|
+
|
|
379
|
+
if delete_model_directory(path):
|
|
380
|
+
success_count += 1
|
|
381
|
+
freed_size += size
|
|
382
|
+
else:
|
|
383
|
+
failed_count += 1
|
|
384
|
+
|
|
385
|
+
# Clean up user_models.json if we deleted any models
|
|
386
|
+
cleaned_user_models = False
|
|
387
|
+
if deleted_checkpoints:
|
|
388
|
+
# Use default path if not provided
|
|
389
|
+
if user_models_file is None:
|
|
390
|
+
from lemonade.cache import DEFAULT_CACHE_DIR
|
|
391
|
+
|
|
392
|
+
user_models_file = os.path.join(DEFAULT_CACHE_DIR, "user_models.json")
|
|
393
|
+
|
|
394
|
+
_cleanup_user_models_json(deleted_checkpoints, user_models_file)
|
|
395
|
+
cleaned_user_models = True
|
|
396
|
+
|
|
397
|
+
return {
|
|
398
|
+
"success_count": success_count,
|
|
399
|
+
"failed_count": failed_count,
|
|
400
|
+
"freed_size": freed_size,
|
|
401
|
+
"freed_size_formatted": format_size(freed_size),
|
|
402
|
+
"cleaned_user_models": cleaned_user_models,
|
|
403
|
+
}
|
lemonade/tools/report/table.py
CHANGED
|
@@ -48,6 +48,18 @@ def _merge_join(str1, str2) -> str:
|
|
|
48
48
|
return str1 + ("\n" if str1 and str2 else "") + str2
|
|
49
49
|
|
|
50
50
|
|
|
51
|
+
def _window_sum(data: list, n_windows: int) -> list:
|
|
52
|
+
"""Sums data into n_windows windows"""
|
|
53
|
+
if n_windows <= 0:
|
|
54
|
+
return data
|
|
55
|
+
window_size = max(1, len(data) // n_windows)
|
|
56
|
+
summed_data = []
|
|
57
|
+
for i in range(0, len(data), window_size):
|
|
58
|
+
window_sum = sum(data[i : i + window_size])
|
|
59
|
+
summed_data.append(window_sum)
|
|
60
|
+
return summed_data
|
|
61
|
+
|
|
62
|
+
|
|
51
63
|
################################################################################
|
|
52
64
|
# CLASSES THAT DESCRIBE TEXT TABLE COLUMNS
|
|
53
65
|
################################################################################
|
|
@@ -88,10 +100,54 @@ class SimpleStat(TableColumn):
|
|
|
88
100
|
if lean and self.omit_if_lean:
|
|
89
101
|
return None
|
|
90
102
|
data = build_stats.get(self.stat, None)
|
|
91
|
-
if data is None:
|
|
92
|
-
return ""
|
|
103
|
+
if data is None or (data == []):
|
|
104
|
+
return "-"
|
|
93
105
|
if self.stat_fn:
|
|
94
106
|
data = self.stat_fn(data)
|
|
107
|
+
cell_str = "\n".join(
|
|
108
|
+
[
|
|
109
|
+
_wrap("-" if x is None else f"{x:{self.format_str}}", self.wrap)
|
|
110
|
+
for x in _to_list(data)
|
|
111
|
+
]
|
|
112
|
+
)
|
|
113
|
+
return cell_str
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
class DependentStat(TableColumn):
|
|
117
|
+
"""
|
|
118
|
+
These are for statistics already declared by the tool or basic build stats that
|
|
119
|
+
rely on one or more additional stats to compute their value. The dependency is
|
|
120
|
+
embodied by the stat_fn function.
|
|
121
|
+
"""
|
|
122
|
+
|
|
123
|
+
def __init__(
|
|
124
|
+
self,
|
|
125
|
+
column_header,
|
|
126
|
+
stats,
|
|
127
|
+
format_str,
|
|
128
|
+
align="center",
|
|
129
|
+
omit_if_lean=False,
|
|
130
|
+
wrap=None,
|
|
131
|
+
stat_fn=None,
|
|
132
|
+
):
|
|
133
|
+
self.column_header = column_header
|
|
134
|
+
self.stats = stats
|
|
135
|
+
self.format_str = format_str
|
|
136
|
+
self.align = align
|
|
137
|
+
self.omit_if_lean = omit_if_lean
|
|
138
|
+
self.wrap = wrap or self.default_wrap
|
|
139
|
+
self.stat_fn = stat_fn
|
|
140
|
+
|
|
141
|
+
def get_str(self, build_stats, lean=False):
|
|
142
|
+
if lean and self.omit_if_lean:
|
|
143
|
+
return None
|
|
144
|
+
stats_data = [build_stats.get(stat, None) for stat in self.stats]
|
|
145
|
+
if self.stat_fn:
|
|
146
|
+
data = self.stat_fn(stats_data)
|
|
147
|
+
else:
|
|
148
|
+
data = stats_data[0]
|
|
149
|
+
if data is None or (data == []):
|
|
150
|
+
return "-"
|
|
95
151
|
cell_str = "\n".join(
|
|
96
152
|
[_wrap(f"{x:{self.format_str}}", self.wrap) for x in _to_list(data)]
|
|
97
153
|
)
|
|
@@ -434,10 +490,12 @@ class Table(ABC):
|
|
|
434
490
|
row = []
|
|
435
491
|
|
|
436
492
|
# First columns
|
|
493
|
+
first_columns_count = 0
|
|
437
494
|
for entry in first_columns:
|
|
438
495
|
entry_str = entry.get_str(build_stats, self.lean)
|
|
439
496
|
if entry_str is not None:
|
|
440
497
|
row.append(entry_str)
|
|
498
|
+
first_columns_count += 1
|
|
441
499
|
|
|
442
500
|
# Per tool columns
|
|
443
501
|
for tool in tools:
|
|
@@ -460,22 +518,24 @@ class Table(ABC):
|
|
|
460
518
|
row.append(entry_str)
|
|
461
519
|
|
|
462
520
|
# Final columns
|
|
521
|
+
last_columns_count = 0
|
|
463
522
|
for entry in last_columns:
|
|
464
523
|
entry_str = entry.get_str(build_stats, self.lean)
|
|
465
524
|
if entry_str is not None:
|
|
466
525
|
row.append(entry_str)
|
|
526
|
+
last_columns_count += 1
|
|
467
527
|
|
|
468
528
|
# See if this row should be merged with the last row
|
|
469
529
|
if last_build_stats and self.merge_test_fn(last_build_stats, build_stats):
|
|
470
530
|
# Merge with last row
|
|
471
|
-
for col in range(0,
|
|
531
|
+
for col in range(0, first_columns_count):
|
|
472
532
|
# If identical, don't duplicate
|
|
473
533
|
if last_row[col] != row[col]:
|
|
474
534
|
last_row[col] = _merge_join(last_row[col], row[col])
|
|
475
|
-
for col in range(
|
|
535
|
+
for col in range(first_columns_count, len(row) - last_columns_count):
|
|
476
536
|
# Allow duplicates
|
|
477
537
|
last_row[col] = _merge_join(last_row[col], row[col])
|
|
478
|
-
for col in range(len(row) -
|
|
538
|
+
for col in range(len(row) - last_columns_count, len(row)):
|
|
479
539
|
# If identical, don't duplicate
|
|
480
540
|
if last_row[col] != row[col]:
|
|
481
541
|
last_row[col] = _merge_join(last_row[col], row[col])
|
|
@@ -581,17 +641,25 @@ class LemonadePerfTable(Table):
|
|
|
581
641
|
Keys.STD_DEV_SECONDS_TO_FIRST_TOKEN,
|
|
582
642
|
".2f",
|
|
583
643
|
),
|
|
644
|
+
StatWithSD(
|
|
645
|
+
_wrap("Prefill Tokens per Second", 8),
|
|
646
|
+
Keys.PREFILL_TOKENS_PER_SECOND,
|
|
647
|
+
Keys.STD_DEV_PREFILL_TOKENS_PER_SECOND,
|
|
648
|
+
".2f",
|
|
649
|
+
),
|
|
584
650
|
StatWithSD(
|
|
585
651
|
_wrap("Tokens per Second", 8),
|
|
586
652
|
Keys.TOKEN_GENERATION_TOKENS_PER_SECOND,
|
|
587
653
|
Keys.STD_DEV_TOKENS_PER_SECOND,
|
|
588
654
|
".2f",
|
|
589
655
|
),
|
|
590
|
-
|
|
656
|
+
DependentStat(
|
|
591
657
|
_wrap("Total Generated Tokens", 9),
|
|
592
|
-
Keys.RESPONSE_TOKENS,
|
|
658
|
+
[Keys.RESPONSE_TOKENS, Keys.PROMPT_TOKENS],
|
|
593
659
|
"d",
|
|
594
|
-
stat_fn=
|
|
660
|
+
stat_fn=lambda x: _window_sum(
|
|
661
|
+
_to_list(x[0]), n_windows=len(_to_list(x[1]))
|
|
662
|
+
),
|
|
595
663
|
),
|
|
596
664
|
SimpleStat(
|
|
597
665
|
_wrap("Memory Used (GB)", 8), Keys.MAX_MEMORY_USED_GBYTE, ".3f"
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import logging
|
|
3
|
+
import subprocess
|
|
4
|
+
import time
|
|
5
|
+
import threading
|
|
6
|
+
|
|
7
|
+
import requests
|
|
8
|
+
|
|
9
|
+
from lemonade_server.pydantic_models import (
|
|
10
|
+
PullConfig,
|
|
11
|
+
ChatCompletionRequest,
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
from lemonade.tools.server.wrapped_server import WrappedServerTelemetry, WrappedServer
|
|
15
|
+
from lemonade.tools.flm.utils import install_flm, download_flm_model
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class FlmTelemetry(WrappedServerTelemetry):
|
|
19
|
+
"""
|
|
20
|
+
Manages telemetry data collection and display for FLM server.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
def parse_telemetry_line(self, line: str):
|
|
24
|
+
"""
|
|
25
|
+
Parse telemetry data from FLM server output lines.
|
|
26
|
+
|
|
27
|
+
Note: as of FLM 0.9.10, no telemetry data is provided by the server CLI.
|
|
28
|
+
This function is required to be implemented, so we leave it empty
|
|
29
|
+
as a placeholder for now.
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
return
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class FlmServer(WrappedServer):
|
|
36
|
+
"""
|
|
37
|
+
Routes OpenAI API requests to an FLM server instance and returns the result
|
|
38
|
+
back to Lemonade Server.
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
def __init__(self):
|
|
42
|
+
self.flm_model_name = None
|
|
43
|
+
super().__init__(server_name="flm-server", telemetry=FlmTelemetry())
|
|
44
|
+
|
|
45
|
+
def address(self):
|
|
46
|
+
return f"http://localhost:{self.port}/v1"
|
|
47
|
+
|
|
48
|
+
def install_server(self):
|
|
49
|
+
"""
|
|
50
|
+
Check if FLM is installed and at minimum version.
|
|
51
|
+
If not, download and run the GUI installer, then wait for completion.
|
|
52
|
+
"""
|
|
53
|
+
install_flm()
|
|
54
|
+
|
|
55
|
+
def download_model(
|
|
56
|
+
self, config_checkpoint, config_mmproj=None, do_not_upgrade=False
|
|
57
|
+
) -> dict:
|
|
58
|
+
download_flm_model(config_checkpoint, config_mmproj, do_not_upgrade)
|
|
59
|
+
|
|
60
|
+
def _launch_server_subprocess(
|
|
61
|
+
self,
|
|
62
|
+
model_config: PullConfig,
|
|
63
|
+
snapshot_files: dict,
|
|
64
|
+
ctx_size: int,
|
|
65
|
+
supports_embeddings: bool = False,
|
|
66
|
+
supports_reranking: bool = False,
|
|
67
|
+
):
|
|
68
|
+
|
|
69
|
+
self._choose_port()
|
|
70
|
+
|
|
71
|
+
# Keep track of the FLM model name so that we can use it later
|
|
72
|
+
self.flm_model_name = model_config.checkpoint
|
|
73
|
+
|
|
74
|
+
command = [
|
|
75
|
+
"flm",
|
|
76
|
+
"serve",
|
|
77
|
+
f"{self.flm_model_name}",
|
|
78
|
+
"--ctx-len",
|
|
79
|
+
str(ctx_size),
|
|
80
|
+
"--port",
|
|
81
|
+
str(self.port),
|
|
82
|
+
]
|
|
83
|
+
|
|
84
|
+
# Set up environment with library path for Linux
|
|
85
|
+
env = os.environ.copy()
|
|
86
|
+
|
|
87
|
+
self.process = subprocess.Popen(
|
|
88
|
+
command,
|
|
89
|
+
stdout=subprocess.PIPE,
|
|
90
|
+
stderr=subprocess.STDOUT,
|
|
91
|
+
text=True,
|
|
92
|
+
encoding="utf-8",
|
|
93
|
+
errors="replace",
|
|
94
|
+
bufsize=1,
|
|
95
|
+
env=env,
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
# Start background thread to log subprocess output
|
|
99
|
+
threading.Thread(
|
|
100
|
+
target=self._log_subprocess_output,
|
|
101
|
+
args=("FLM SERVER",),
|
|
102
|
+
daemon=True,
|
|
103
|
+
).start()
|
|
104
|
+
|
|
105
|
+
def _wait_for_load(self):
|
|
106
|
+
"""
|
|
107
|
+
FLM doesn't seem to have a health API, so we'll use the "list local models"
|
|
108
|
+
API to check if the server is up.
|
|
109
|
+
"""
|
|
110
|
+
status_code = None
|
|
111
|
+
while not self.process.poll() and status_code != 200:
|
|
112
|
+
health_url = f"http://localhost:{self.port}/api/tags"
|
|
113
|
+
try:
|
|
114
|
+
health_response = requests.get(health_url)
|
|
115
|
+
except requests.exceptions.ConnectionError:
|
|
116
|
+
logging.debug(
|
|
117
|
+
"Not able to connect to %s yet, will retry", self.server_name
|
|
118
|
+
)
|
|
119
|
+
else:
|
|
120
|
+
status_code = health_response.status_code
|
|
121
|
+
logging.debug(
|
|
122
|
+
"Testing %s readiness (will retry until ready), result: %s",
|
|
123
|
+
self.server_name,
|
|
124
|
+
health_response.json(),
|
|
125
|
+
)
|
|
126
|
+
time.sleep(1)
|
|
127
|
+
|
|
128
|
+
def chat_completion(self, chat_completion_request: ChatCompletionRequest):
|
|
129
|
+
# FLM requires the correct model name to be in the request
|
|
130
|
+
# (whereas llama-server ignores the model name field in the request)
|
|
131
|
+
chat_completion_request.model = self.flm_model_name
|
|
132
|
+
|
|
133
|
+
return super().chat_completion(chat_completion_request)
|