lemonade-sdk 8.1.4__py3-none-any.whl → 8.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lemonade-sdk might be problematic. Click here for more details.

Files changed (53) hide show
  1. lemonade/cache.py +6 -1
  2. lemonade/cli.py +47 -5
  3. lemonade/common/inference_engines.py +13 -4
  4. lemonade/common/status.py +4 -4
  5. lemonade/common/system_info.py +544 -1
  6. lemonade/profilers/agt_power.py +437 -0
  7. lemonade/profilers/hwinfo_power.py +429 -0
  8. lemonade/tools/accuracy.py +143 -48
  9. lemonade/tools/adapter.py +6 -1
  10. lemonade/tools/bench.py +26 -8
  11. lemonade/tools/flm/__init__.py +1 -0
  12. lemonade/tools/flm/utils.py +303 -0
  13. lemonade/tools/huggingface/bench.py +6 -1
  14. lemonade/tools/llamacpp/bench.py +146 -27
  15. lemonade/tools/llamacpp/load.py +30 -2
  16. lemonade/tools/llamacpp/utils.py +393 -33
  17. lemonade/tools/oga/bench.py +5 -26
  18. lemonade/tools/oga/load.py +60 -121
  19. lemonade/tools/oga/migration.py +403 -0
  20. lemonade/tools/report/table.py +76 -8
  21. lemonade/tools/server/flm.py +133 -0
  22. lemonade/tools/server/llamacpp.py +220 -553
  23. lemonade/tools/server/serve.py +684 -168
  24. lemonade/tools/server/static/js/chat.js +666 -342
  25. lemonade/tools/server/static/js/model-settings.js +24 -3
  26. lemonade/tools/server/static/js/models.js +597 -73
  27. lemonade/tools/server/static/js/shared.js +79 -14
  28. lemonade/tools/server/static/logs.html +191 -0
  29. lemonade/tools/server/static/styles.css +491 -66
  30. lemonade/tools/server/static/webapp.html +83 -31
  31. lemonade/tools/server/tray.py +158 -38
  32. lemonade/tools/server/utils/macos_tray.py +226 -0
  33. lemonade/tools/server/utils/{system_tray.py → windows_tray.py} +13 -0
  34. lemonade/tools/server/webapp.py +4 -1
  35. lemonade/tools/server/wrapped_server.py +559 -0
  36. lemonade/version.py +1 -1
  37. lemonade_install/install.py +54 -611
  38. {lemonade_sdk-8.1.4.dist-info → lemonade_sdk-8.2.2.dist-info}/METADATA +29 -72
  39. lemonade_sdk-8.2.2.dist-info/RECORD +83 -0
  40. lemonade_server/cli.py +145 -37
  41. lemonade_server/model_manager.py +521 -37
  42. lemonade_server/pydantic_models.py +28 -1
  43. lemonade_server/server_models.json +246 -92
  44. lemonade_server/settings.py +39 -39
  45. lemonade/tools/quark/__init__.py +0 -0
  46. lemonade/tools/quark/quark_load.py +0 -173
  47. lemonade/tools/quark/quark_quantize.py +0 -439
  48. lemonade_sdk-8.1.4.dist-info/RECORD +0 -77
  49. {lemonade_sdk-8.1.4.dist-info → lemonade_sdk-8.2.2.dist-info}/WHEEL +0 -0
  50. {lemonade_sdk-8.1.4.dist-info → lemonade_sdk-8.2.2.dist-info}/entry_points.txt +0 -0
  51. {lemonade_sdk-8.1.4.dist-info → lemonade_sdk-8.2.2.dist-info}/licenses/LICENSE +0 -0
  52. {lemonade_sdk-8.1.4.dist-info → lemonade_sdk-8.2.2.dist-info}/licenses/NOTICE.md +0 -0
  53. {lemonade_sdk-8.1.4.dist-info → lemonade_sdk-8.2.2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,403 @@
1
+ """
2
+ Migration utilities for handling RyzenAI version upgrades.
3
+
4
+ This module provides functionality to detect and clean up incompatible RyzenAI models
5
+ when upgrading between major versions (e.g., 1.4/1.5 -> 1.6).
6
+ """
7
+
8
+ import os
9
+ import json
10
+ import shutil
11
+ import logging
12
+ from typing import List, Dict, Optional, Tuple
13
+
14
+
15
+ def get_directory_size(path: str) -> int:
16
+ """
17
+ Calculate the total size of a directory in bytes.
18
+
19
+ Args:
20
+ path: Path to the directory
21
+
22
+ Returns:
23
+ Total size in bytes
24
+ """
25
+ total_size = 0
26
+ try:
27
+ for dirpath, _, filenames in os.walk(path):
28
+ for filename in filenames:
29
+ filepath = os.path.join(dirpath, filename)
30
+ try:
31
+ total_size += os.path.getsize(filepath)
32
+ except (OSError, FileNotFoundError):
33
+ # Skip files that can't be accessed
34
+ pass
35
+ except (OSError, FileNotFoundError):
36
+ pass
37
+ return total_size
38
+
39
+
40
+ def format_size(size_bytes: int) -> str:
41
+ """
42
+ Format byte size to human-readable string.
43
+
44
+ Args:
45
+ size_bytes: Size in bytes
46
+
47
+ Returns:
48
+ Formatted string (e.g., "1.5 GB", "450 MB")
49
+ """
50
+ for unit in ["B", "KB", "MB"]:
51
+ if size_bytes < 1024.0:
52
+ return f"{size_bytes:.1f} {unit}"
53
+ size_bytes /= 1024.0
54
+ return f"{size_bytes:.1f} GB"
55
+
56
+
57
+ def check_rai_config_version(model_path: str, required_version: str = "1.6.0") -> bool:
58
+ """
59
+ Check if a model's rai_config.json contains the required version.
60
+
61
+ Args:
62
+ model_path: Path to the model directory
63
+ required_version: Version string to check for (default: "1.6.0")
64
+
65
+ Returns:
66
+ True if model is compatible (has required version), False otherwise
67
+ """
68
+ rai_config_path = os.path.join(model_path, "rai_config.json")
69
+
70
+ # If no rai_config.json exists, it's not a RyzenAI model
71
+ if not os.path.exists(rai_config_path):
72
+ return True
73
+
74
+ try:
75
+ with open(rai_config_path, "r", encoding="utf-8") as f:
76
+ config = json.load(f)
77
+
78
+ # Check if max_prompt_length exists and has the required version
79
+ if "max_prompt_length" in config:
80
+ max_prompt_length = config["max_prompt_length"]
81
+ if isinstance(max_prompt_length, dict):
82
+ # If it's a dict with version keys, check for required version
83
+ return required_version in max_prompt_length
84
+ # Fallback to True to avoid deleting models if format changes
85
+ return True
86
+
87
+ return True
88
+
89
+ except (json.JSONDecodeError, OSError) as e:
90
+ logging.warning(f"Could not read rai_config.json from {model_path}: {e}")
91
+ # If we can't read it, assume it's compatible to avoid false positives
92
+ return True
93
+
94
+
95
+ def scan_oga_models_cache(cache_dir: str) -> List[Dict[str, any]]:
96
+ """
97
+ Scan the Lemonade OGA models cache for incompatible models.
98
+
99
+ Args:
100
+ cache_dir: Path to the Lemonade cache directory
101
+
102
+ Returns:
103
+ List of dicts with model info (path, name, size, compatible)
104
+ """
105
+ oga_models_path = os.path.join(cache_dir, "oga_models")
106
+ incompatible_models = []
107
+
108
+ if not os.path.exists(oga_models_path):
109
+ return incompatible_models
110
+
111
+ try:
112
+ # Iterate through model directories in oga_models
113
+ for model_name in os.listdir(oga_models_path):
114
+ model_dir = os.path.join(oga_models_path, model_name)
115
+
116
+ if not os.path.isdir(model_dir):
117
+ continue
118
+
119
+ # Check all subdirectories (e.g., npu-int4, hybrid-int4)
120
+ for subdir in os.listdir(model_dir):
121
+ subdir_path = os.path.join(model_dir, subdir)
122
+
123
+ if not os.path.isdir(subdir_path):
124
+ continue
125
+
126
+ # Check if this model version is compatible
127
+ if not check_rai_config_version(subdir_path):
128
+ size = get_directory_size(subdir_path)
129
+ incompatible_models.append(
130
+ {
131
+ "path": subdir_path,
132
+ "name": f"{model_name}/{subdir}",
133
+ "size": size,
134
+ "size_formatted": format_size(size),
135
+ "cache_type": "lemonade",
136
+ }
137
+ )
138
+
139
+ except (OSError, PermissionError) as e:
140
+ logging.warning(f"Error scanning oga_models cache: {e}")
141
+
142
+ return incompatible_models
143
+
144
+
145
+ def scan_huggingface_cache(hf_home: Optional[str] = None) -> List[Dict[str, any]]:
146
+ """
147
+ Scan the HuggingFace cache for incompatible RyzenAI models.
148
+
149
+ Args:
150
+ hf_home: Path to HuggingFace home directory (default: from env or ~/.cache/huggingface)
151
+
152
+ Returns:
153
+ List of dicts with model info (path, name, size, compatible)
154
+ """
155
+ if hf_home is None:
156
+ hf_home = os.environ.get(
157
+ "HF_HOME", os.path.join(os.path.expanduser("~"), ".cache", "huggingface")
158
+ )
159
+
160
+ hub_path = os.path.join(hf_home, "hub")
161
+ incompatible_models = []
162
+
163
+ if not os.path.exists(hub_path):
164
+ return incompatible_models
165
+
166
+ try:
167
+ # Iterate through model directories in HuggingFace cache
168
+ for item in os.listdir(hub_path):
169
+ if not item.startswith("models--"):
170
+ continue
171
+
172
+ model_dir = os.path.join(hub_path, item)
173
+ if not os.path.isdir(model_dir):
174
+ continue
175
+
176
+ # Look in snapshots subdirectory
177
+ snapshots_dir = os.path.join(model_dir, "snapshots")
178
+ if not os.path.exists(snapshots_dir):
179
+ continue
180
+
181
+ # Check each snapshot
182
+ for snapshot_hash in os.listdir(snapshots_dir):
183
+ snapshot_path = os.path.join(snapshots_dir, snapshot_hash)
184
+
185
+ if not os.path.isdir(snapshot_path):
186
+ continue
187
+
188
+ # Check if this snapshot has incompatible RyzenAI model
189
+ if not check_rai_config_version(snapshot_path):
190
+ # Extract readable model name from directory
191
+ model_name = item.replace("models--", "").replace("--", "/")
192
+ size = get_directory_size(
193
+ model_dir
194
+ ) # Size of entire model directory
195
+ incompatible_models.append(
196
+ {
197
+ "path": model_dir,
198
+ "name": model_name,
199
+ "size": size,
200
+ "size_formatted": format_size(size),
201
+ "cache_type": "huggingface",
202
+ }
203
+ )
204
+ break
205
+
206
+ except (OSError, PermissionError) as e:
207
+ logging.warning(f"Error scanning HuggingFace cache: {e}")
208
+
209
+ return incompatible_models
210
+
211
+
212
+ def detect_incompatible_ryzenai_models(
213
+ cache_dir: str, hf_home: Optional[str] = None
214
+ ) -> Tuple[List[Dict[str, any]], int]:
215
+ """
216
+ Detect all incompatible RyzenAI models in both Lemonade and HuggingFace caches.
217
+
218
+ Args:
219
+ cache_dir: Path to the Lemonade cache directory
220
+ hf_home: Path to HuggingFace home directory (optional)
221
+
222
+ Returns:
223
+ Tuple of (list of incompatible models, total size in bytes)
224
+ """
225
+ incompatible_models = []
226
+
227
+ # Scan Lemonade cache
228
+ oga_models = scan_oga_models_cache(cache_dir)
229
+ incompatible_models.extend(oga_models)
230
+
231
+ # Scan HuggingFace cache
232
+ hf_models = scan_huggingface_cache(hf_home)
233
+ incompatible_models.extend(hf_models)
234
+
235
+ # Calculate total size
236
+ total_size = sum(model["size"] for model in incompatible_models)
237
+
238
+ logging.info(
239
+ f"Found {len(incompatible_models)} incompatible RyzenAI models "
240
+ f"({format_size(total_size)} total)"
241
+ )
242
+
243
+ return incompatible_models, total_size
244
+
245
+
246
+ def delete_model_directory(model_path: str) -> bool:
247
+ """
248
+ Safely delete a model directory.
249
+
250
+ Args:
251
+ model_path: Path to the model directory to delete
252
+
253
+ Returns:
254
+ True if deletion successful, False otherwise
255
+ """
256
+ try:
257
+ if os.path.exists(model_path):
258
+ shutil.rmtree(model_path)
259
+ logging.info(f"Deleted model directory: {model_path}")
260
+ return True
261
+ else:
262
+ logging.warning(f"Model directory not found: {model_path}")
263
+ return False
264
+ except (OSError, PermissionError) as e:
265
+ logging.error(f"Failed to delete model directory {model_path}: {e}")
266
+ return False
267
+
268
+
269
+ def _extract_checkpoint_from_path(path: str) -> Optional[str]:
270
+ """
271
+ Extract the checkpoint name from a model path.
272
+
273
+ Args:
274
+ path: Model directory path (either Lemonade cache or HuggingFace cache)
275
+
276
+ Returns:
277
+ Checkpoint name (e.g., "amd/Qwen2.5-1.5B-Instruct-awq") or None if not extractable
278
+ """
279
+ # Normalize path separators to handle both Unix and Windows paths
280
+ normalized_path = path.replace("\\", "/")
281
+ parts = normalized_path.split("/")
282
+
283
+ # Handle HuggingFace cache paths: models--{org}--{repo}
284
+ if "models--" in normalized_path:
285
+ for part in parts:
286
+ if part.startswith("models--"):
287
+ # Convert models--org--repo to org/repo
288
+ # Replace first two occurrences of -- with /
289
+ checkpoint = part.replace("models--", "", 1).replace("--", "/", 1)
290
+ return checkpoint
291
+ return None
292
+
293
+ # Handle Lemonade cache paths: oga_models/{model_name}/{device}-{dtype}
294
+ if "oga_models" in normalized_path:
295
+ try:
296
+ oga_models_idx = parts.index("oga_models")
297
+ if oga_models_idx + 1 < len(parts):
298
+ model_name = parts[oga_models_idx + 1]
299
+ # Convert model_name back to checkpoint (e.g., amd_model -> amd/model)
300
+ # This is a heuristic - we look for the pattern {org}_{model}
301
+ checkpoint = model_name.replace("_", "/", 1)
302
+ return checkpoint
303
+ except (ValueError, IndexError):
304
+ return None
305
+
306
+ return None
307
+
308
+
309
+ def _cleanup_user_models_json(deleted_checkpoints: List[str], user_models_file: str):
310
+ """
311
+ Remove entries from user_models.json for models that have been deleted.
312
+
313
+ Args:
314
+ deleted_checkpoints: List of checkpoint names that were deleted
315
+ user_models_file: Path to user_models.json
316
+ """
317
+ if not deleted_checkpoints or not os.path.exists(user_models_file):
318
+ return
319
+
320
+ try:
321
+ with open(user_models_file, "r", encoding="utf-8") as f:
322
+ user_models = json.load(f)
323
+
324
+ # Track which models to remove
325
+ models_to_remove = []
326
+ for model_name, model_info in user_models.items():
327
+ checkpoint = model_info.get("checkpoint", "")
328
+ # Check if this checkpoint matches any deleted checkpoints
329
+ # We do a case-insensitive comparison since paths may have been lowercased
330
+ for deleted_checkpoint in deleted_checkpoints:
331
+ if checkpoint.lower() == deleted_checkpoint.lower():
332
+ models_to_remove.append(model_name)
333
+ break
334
+
335
+ # Remove the models
336
+ for model_name in models_to_remove:
337
+ del user_models[model_name]
338
+ logging.info(f"Removed {model_name} from user_models.json")
339
+
340
+ # Save the updated file only if we removed something
341
+ if models_to_remove:
342
+ with open(user_models_file, "w", encoding="utf-8") as f:
343
+ json.dump(user_models, f, indent=2)
344
+ logging.info(
345
+ f"Updated user_models.json - removed {len(models_to_remove)} entries"
346
+ )
347
+
348
+ except (json.JSONDecodeError, OSError) as e:
349
+ logging.warning(f"Could not update user_models.json: {e}")
350
+
351
+
352
+ def delete_incompatible_models(
353
+ model_paths: List[str], user_models_file: Optional[str] = None
354
+ ) -> Dict[str, any]:
355
+ """
356
+ Delete multiple incompatible model directories and clean up user_models.json.
357
+
358
+ Args:
359
+ model_paths: List of paths to delete
360
+ user_models_file: Path to user_models.json (optional, will use default if not provided)
361
+
362
+ Returns:
363
+ Dict with deletion results (success_count, failed_count, freed_size, cleaned_user_models)
364
+ """
365
+ success_count = 0
366
+ failed_count = 0
367
+ freed_size = 0
368
+ deleted_checkpoints = []
369
+
370
+ for path in model_paths:
371
+ # Calculate size before deletion
372
+ size = get_directory_size(path)
373
+
374
+ # Extract checkpoint name before deleting
375
+ checkpoint = _extract_checkpoint_from_path(path)
376
+ if checkpoint:
377
+ deleted_checkpoints.append(checkpoint)
378
+
379
+ if delete_model_directory(path):
380
+ success_count += 1
381
+ freed_size += size
382
+ else:
383
+ failed_count += 1
384
+
385
+ # Clean up user_models.json if we deleted any models
386
+ cleaned_user_models = False
387
+ if deleted_checkpoints:
388
+ # Use default path if not provided
389
+ if user_models_file is None:
390
+ from lemonade.cache import DEFAULT_CACHE_DIR
391
+
392
+ user_models_file = os.path.join(DEFAULT_CACHE_DIR, "user_models.json")
393
+
394
+ _cleanup_user_models_json(deleted_checkpoints, user_models_file)
395
+ cleaned_user_models = True
396
+
397
+ return {
398
+ "success_count": success_count,
399
+ "failed_count": failed_count,
400
+ "freed_size": freed_size,
401
+ "freed_size_formatted": format_size(freed_size),
402
+ "cleaned_user_models": cleaned_user_models,
403
+ }
@@ -48,6 +48,18 @@ def _merge_join(str1, str2) -> str:
48
48
  return str1 + ("\n" if str1 and str2 else "") + str2
49
49
 
50
50
 
51
+ def _window_sum(data: list, n_windows: int) -> list:
52
+ """Sums data into n_windows windows"""
53
+ if n_windows <= 0:
54
+ return data
55
+ window_size = max(1, len(data) // n_windows)
56
+ summed_data = []
57
+ for i in range(0, len(data), window_size):
58
+ window_sum = sum(data[i : i + window_size])
59
+ summed_data.append(window_sum)
60
+ return summed_data
61
+
62
+
51
63
  ################################################################################
52
64
  # CLASSES THAT DESCRIBE TEXT TABLE COLUMNS
53
65
  ################################################################################
@@ -88,10 +100,54 @@ class SimpleStat(TableColumn):
88
100
  if lean and self.omit_if_lean:
89
101
  return None
90
102
  data = build_stats.get(self.stat, None)
91
- if data is None:
92
- return ""
103
+ if data is None or (data == []):
104
+ return "-"
93
105
  if self.stat_fn:
94
106
  data = self.stat_fn(data)
107
+ cell_str = "\n".join(
108
+ [
109
+ _wrap("-" if x is None else f"{x:{self.format_str}}", self.wrap)
110
+ for x in _to_list(data)
111
+ ]
112
+ )
113
+ return cell_str
114
+
115
+
116
+ class DependentStat(TableColumn):
117
+ """
118
+ These are for statistics already declared by the tool or basic build stats that
119
+ rely on one or more additional stats to compute their value. The dependency is
120
+ embodied by the stat_fn function.
121
+ """
122
+
123
+ def __init__(
124
+ self,
125
+ column_header,
126
+ stats,
127
+ format_str,
128
+ align="center",
129
+ omit_if_lean=False,
130
+ wrap=None,
131
+ stat_fn=None,
132
+ ):
133
+ self.column_header = column_header
134
+ self.stats = stats
135
+ self.format_str = format_str
136
+ self.align = align
137
+ self.omit_if_lean = omit_if_lean
138
+ self.wrap = wrap or self.default_wrap
139
+ self.stat_fn = stat_fn
140
+
141
+ def get_str(self, build_stats, lean=False):
142
+ if lean and self.omit_if_lean:
143
+ return None
144
+ stats_data = [build_stats.get(stat, None) for stat in self.stats]
145
+ if self.stat_fn:
146
+ data = self.stat_fn(stats_data)
147
+ else:
148
+ data = stats_data[0]
149
+ if data is None or (data == []):
150
+ return "-"
95
151
  cell_str = "\n".join(
96
152
  [_wrap(f"{x:{self.format_str}}", self.wrap) for x in _to_list(data)]
97
153
  )
@@ -434,10 +490,12 @@ class Table(ABC):
434
490
  row = []
435
491
 
436
492
  # First columns
493
+ first_columns_count = 0
437
494
  for entry in first_columns:
438
495
  entry_str = entry.get_str(build_stats, self.lean)
439
496
  if entry_str is not None:
440
497
  row.append(entry_str)
498
+ first_columns_count += 1
441
499
 
442
500
  # Per tool columns
443
501
  for tool in tools:
@@ -460,22 +518,24 @@ class Table(ABC):
460
518
  row.append(entry_str)
461
519
 
462
520
  # Final columns
521
+ last_columns_count = 0
463
522
  for entry in last_columns:
464
523
  entry_str = entry.get_str(build_stats, self.lean)
465
524
  if entry_str is not None:
466
525
  row.append(entry_str)
526
+ last_columns_count += 1
467
527
 
468
528
  # See if this row should be merged with the last row
469
529
  if last_build_stats and self.merge_test_fn(last_build_stats, build_stats):
470
530
  # Merge with last row
471
- for col in range(0, len(first_columns)):
531
+ for col in range(0, first_columns_count):
472
532
  # If identical, don't duplicate
473
533
  if last_row[col] != row[col]:
474
534
  last_row[col] = _merge_join(last_row[col], row[col])
475
- for col in range(len(first_columns), len(row) - len(last_columns)):
535
+ for col in range(first_columns_count, len(row) - last_columns_count):
476
536
  # Allow duplicates
477
537
  last_row[col] = _merge_join(last_row[col], row[col])
478
- for col in range(len(row) - len(last_columns), len(row)):
538
+ for col in range(len(row) - last_columns_count, len(row)):
479
539
  # If identical, don't duplicate
480
540
  if last_row[col] != row[col]:
481
541
  last_row[col] = _merge_join(last_row[col], row[col])
@@ -581,17 +641,25 @@ class LemonadePerfTable(Table):
581
641
  Keys.STD_DEV_SECONDS_TO_FIRST_TOKEN,
582
642
  ".2f",
583
643
  ),
644
+ StatWithSD(
645
+ _wrap("Prefill Tokens per Second", 8),
646
+ Keys.PREFILL_TOKENS_PER_SECOND,
647
+ Keys.STD_DEV_PREFILL_TOKENS_PER_SECOND,
648
+ ".2f",
649
+ ),
584
650
  StatWithSD(
585
651
  _wrap("Tokens per Second", 8),
586
652
  Keys.TOKEN_GENERATION_TOKENS_PER_SECOND,
587
653
  Keys.STD_DEV_TOKENS_PER_SECOND,
588
654
  ".2f",
589
655
  ),
590
- SimpleStat(
656
+ DependentStat(
591
657
  _wrap("Total Generated Tokens", 9),
592
- Keys.RESPONSE_TOKENS,
658
+ [Keys.RESPONSE_TOKENS, Keys.PROMPT_TOKENS],
593
659
  "d",
594
- stat_fn=sum,
660
+ stat_fn=lambda x: _window_sum(
661
+ _to_list(x[0]), n_windows=len(_to_list(x[1]))
662
+ ),
595
663
  ),
596
664
  SimpleStat(
597
665
  _wrap("Memory Used (GB)", 8), Keys.MAX_MEMORY_USED_GBYTE, ".3f"
@@ -0,0 +1,133 @@
1
+ import os
2
+ import logging
3
+ import subprocess
4
+ import time
5
+ import threading
6
+
7
+ import requests
8
+
9
+ from lemonade_server.pydantic_models import (
10
+ PullConfig,
11
+ ChatCompletionRequest,
12
+ )
13
+
14
+ from lemonade.tools.server.wrapped_server import WrappedServerTelemetry, WrappedServer
15
+ from lemonade.tools.flm.utils import install_flm, download_flm_model
16
+
17
+
18
+ class FlmTelemetry(WrappedServerTelemetry):
19
+ """
20
+ Manages telemetry data collection and display for FLM server.
21
+ """
22
+
23
+ def parse_telemetry_line(self, line: str):
24
+ """
25
+ Parse telemetry data from FLM server output lines.
26
+
27
+ Note: as of FLM 0.9.10, no telemetry data is provided by the server CLI.
28
+ This function is required to be implemented, so we leave it empty
29
+ as a placeholder for now.
30
+ """
31
+
32
+ return
33
+
34
+
35
+ class FlmServer(WrappedServer):
36
+ """
37
+ Routes OpenAI API requests to an FLM server instance and returns the result
38
+ back to Lemonade Server.
39
+ """
40
+
41
+ def __init__(self):
42
+ self.flm_model_name = None
43
+ super().__init__(server_name="flm-server", telemetry=FlmTelemetry())
44
+
45
+ def address(self):
46
+ return f"http://localhost:{self.port}/v1"
47
+
48
+ def install_server(self):
49
+ """
50
+ Check if FLM is installed and at minimum version.
51
+ If not, download and run the GUI installer, then wait for completion.
52
+ """
53
+ install_flm()
54
+
55
+ def download_model(
56
+ self, config_checkpoint, config_mmproj=None, do_not_upgrade=False
57
+ ) -> dict:
58
+ download_flm_model(config_checkpoint, config_mmproj, do_not_upgrade)
59
+
60
+ def _launch_server_subprocess(
61
+ self,
62
+ model_config: PullConfig,
63
+ snapshot_files: dict,
64
+ ctx_size: int,
65
+ supports_embeddings: bool = False,
66
+ supports_reranking: bool = False,
67
+ ):
68
+
69
+ self._choose_port()
70
+
71
+ # Keep track of the FLM model name so that we can use it later
72
+ self.flm_model_name = model_config.checkpoint
73
+
74
+ command = [
75
+ "flm",
76
+ "serve",
77
+ f"{self.flm_model_name}",
78
+ "--ctx-len",
79
+ str(ctx_size),
80
+ "--port",
81
+ str(self.port),
82
+ ]
83
+
84
+ # Set up environment with library path for Linux
85
+ env = os.environ.copy()
86
+
87
+ self.process = subprocess.Popen(
88
+ command,
89
+ stdout=subprocess.PIPE,
90
+ stderr=subprocess.STDOUT,
91
+ text=True,
92
+ encoding="utf-8",
93
+ errors="replace",
94
+ bufsize=1,
95
+ env=env,
96
+ )
97
+
98
+ # Start background thread to log subprocess output
99
+ threading.Thread(
100
+ target=self._log_subprocess_output,
101
+ args=("FLM SERVER",),
102
+ daemon=True,
103
+ ).start()
104
+
105
+ def _wait_for_load(self):
106
+ """
107
+ FLM doesn't seem to have a health API, so we'll use the "list local models"
108
+ API to check if the server is up.
109
+ """
110
+ status_code = None
111
+ while not self.process.poll() and status_code != 200:
112
+ health_url = f"http://localhost:{self.port}/api/tags"
113
+ try:
114
+ health_response = requests.get(health_url)
115
+ except requests.exceptions.ConnectionError:
116
+ logging.debug(
117
+ "Not able to connect to %s yet, will retry", self.server_name
118
+ )
119
+ else:
120
+ status_code = health_response.status_code
121
+ logging.debug(
122
+ "Testing %s readiness (will retry until ready), result: %s",
123
+ self.server_name,
124
+ health_response.json(),
125
+ )
126
+ time.sleep(1)
127
+
128
+ def chat_completion(self, chat_completion_request: ChatCompletionRequest):
129
+ # FLM requires the correct model name to be in the request
130
+ # (whereas llama-server ignores the model name field in the request)
131
+ chat_completion_request.model = self.flm_model_name
132
+
133
+ return super().chat_completion(chat_completion_request)