lemonade-sdk 8.1.11__py3-none-any.whl → 8.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lemonade-sdk might be problematic. Click here for more details.

Files changed (38) hide show
  1. lemonade/cache.py +6 -1
  2. lemonade/common/status.py +4 -4
  3. lemonade/common/system_info.py +0 -26
  4. lemonade/tools/accuracy.py +143 -48
  5. lemonade/tools/adapter.py +6 -1
  6. lemonade/tools/bench.py +26 -8
  7. lemonade/tools/flm/utils.py +70 -22
  8. lemonade/tools/huggingface/bench.py +6 -1
  9. lemonade/tools/llamacpp/bench.py +146 -27
  10. lemonade/tools/llamacpp/load.py +30 -2
  11. lemonade/tools/llamacpp/utils.py +317 -21
  12. lemonade/tools/oga/bench.py +5 -26
  13. lemonade/tools/oga/load.py +49 -123
  14. lemonade/tools/oga/migration.py +403 -0
  15. lemonade/tools/report/table.py +76 -8
  16. lemonade/tools/server/flm.py +2 -6
  17. lemonade/tools/server/llamacpp.py +43 -2
  18. lemonade/tools/server/serve.py +354 -18
  19. lemonade/tools/server/static/js/chat.js +15 -77
  20. lemonade/tools/server/static/js/model-settings.js +24 -3
  21. lemonade/tools/server/static/js/models.js +440 -37
  22. lemonade/tools/server/static/js/shared.js +61 -8
  23. lemonade/tools/server/static/logs.html +157 -13
  24. lemonade/tools/server/static/styles.css +204 -0
  25. lemonade/tools/server/static/webapp.html +39 -1
  26. lemonade/version.py +1 -1
  27. lemonade_install/install.py +33 -579
  28. {lemonade_sdk-8.1.11.dist-info → lemonade_sdk-8.2.2.dist-info}/METADATA +6 -4
  29. {lemonade_sdk-8.1.11.dist-info → lemonade_sdk-8.2.2.dist-info}/RECORD +38 -37
  30. lemonade_server/cli.py +10 -0
  31. lemonade_server/model_manager.py +172 -11
  32. lemonade_server/pydantic_models.py +3 -0
  33. lemonade_server/server_models.json +102 -66
  34. {lemonade_sdk-8.1.11.dist-info → lemonade_sdk-8.2.2.dist-info}/WHEEL +0 -0
  35. {lemonade_sdk-8.1.11.dist-info → lemonade_sdk-8.2.2.dist-info}/entry_points.txt +0 -0
  36. {lemonade_sdk-8.1.11.dist-info → lemonade_sdk-8.2.2.dist-info}/licenses/LICENSE +0 -0
  37. {lemonade_sdk-8.1.11.dist-info → lemonade_sdk-8.2.2.dist-info}/licenses/NOTICE.md +0 -0
  38. {lemonade_sdk-8.1.11.dist-info → lemonade_sdk-8.2.2.dist-info}/top_level.txt +0 -0
lemonade/cache.py CHANGED
@@ -43,7 +43,11 @@ def build_name(input_name):
43
43
  """
44
44
 
45
45
  if os.path.isdir(input_name):
46
+ # Input is a folder so no good way to determine a model name
46
47
  input_name_sanitized = "local_model"
48
+ elif os.path.isfile(input_name):
49
+ # Use the filename without its extension
50
+ input_name_sanitized = os.path.splitext(os.path.basename(input_name))[0]
47
51
  else:
48
52
  # Sanitize the input name
49
53
  input_name_sanitized = input_name.replace("/", "_")
@@ -63,8 +67,9 @@ class Keys:
63
67
  TOKEN_GENERATION_TOKENS_PER_SECOND = "token_generation_tokens_per_second"
64
68
  STD_DEV_TOKENS_PER_SECOND = "std_dev_tokens_per_second"
65
69
  SECONDS_TO_FIRST_TOKEN = "seconds_to_first_token"
66
- PREFILL_TOKENS_PER_SECOND = "prefill_tokens_per_second"
67
70
  STD_DEV_SECONDS_TO_FIRST_TOKEN = "std_dev_seconds_to_first_token"
71
+ PREFILL_TOKENS_PER_SECOND = "prefill_tokens_per_second"
72
+ STD_DEV_PREFILL_TOKENS_PER_SECOND = "std_dev_prefill_tokens_per_second"
68
73
  CHECKPOINT = "checkpoint"
69
74
  DTYPE = "dtype"
70
75
  PROMPT = "prompt"
lemonade/common/status.py CHANGED
@@ -112,10 +112,10 @@ class UniqueInvocationInfo(BasicInfo):
112
112
  if print_file_name:
113
113
  print(f"{self.script_name}{self.extension}:")
114
114
 
115
- # Print invocation about the model (only applies to scripts, not ONNX files or
115
+ # Print invocation about the model (only applies to scripts, not ONNX or GGUF files, nor
116
116
  # LLMs, which have no extension)
117
117
  if not (
118
- self.extension == ".onnx"
118
+ self.extension in [".onnx", ".gguf"]
119
119
  or self.extension == build.state_file_name
120
120
  or self.extension == ""
121
121
  ):
@@ -138,7 +138,7 @@ class UniqueInvocationInfo(BasicInfo):
138
138
 
139
139
  if self.depth == 0:
140
140
  print(f"{self.indent}\tLocation:\t{self.file}", end="")
141
- if self.extension == ".onnx":
141
+ if self.extension in [".onnx", ".gguf"]:
142
142
  print()
143
143
  else:
144
144
  print(f", line {self.line}")
@@ -314,7 +314,7 @@ class UniqueInvocationInfo(BasicInfo):
314
314
  Print information about a given model or submodel.
315
315
  """
316
316
 
317
- if self.extension == ".onnx" or self.extension == "":
317
+ if self.extension in [".onnx", ".gguf"] or self.extension == "":
318
318
  self.indent = "\t" * (2 * self.depth)
319
319
  else:
320
320
  self.indent = "\t" * (2 * self.depth + 1)
@@ -1110,32 +1110,6 @@ class LinuxSystemInfo(SystemInfo):
1110
1110
 
1111
1111
  return ""
1112
1112
 
1113
- def _get_nvidia_vram_smi_linux(self) -> float:
1114
- """
1115
- Get NVIDIA GPU VRAM on Linux using nvidia-smi command.
1116
-
1117
- Returns:
1118
- float: VRAM in GB, or 0.0 if detection fails
1119
- """
1120
- try:
1121
- output = (
1122
- subprocess.check_output(
1123
- "nvidia-smi --query-gpu=memory.total --format=csv,noheader,nounits",
1124
- shell=True,
1125
- stderr=subprocess.DEVNULL,
1126
- )
1127
- .decode()
1128
- .strip()
1129
- )
1130
-
1131
- # nvidia-smi returns memory in MB
1132
- vram_mb = int(output.split("\n")[0])
1133
- vram_gb = round(vram_mb / 1024, 1)
1134
- return vram_gb
1135
- except (subprocess.CalledProcessError, FileNotFoundError, ValueError):
1136
- pass
1137
- return 0.0
1138
-
1139
1113
  @staticmethod
1140
1114
  def get_processor_name() -> str:
1141
1115
  """
@@ -83,42 +83,116 @@ class LMEvalHarness(Tool):
83
83
 
84
84
  return parser
85
85
 
86
- def _process_results(self, results_dir, state):
87
- """Process evaluation results and save to state stats"""
88
- if not os.path.exists(results_dir) or not os.path.isdir(results_dir):
89
- printing.log_warning(f"Results directory not found at {results_dir}")
90
- return
91
-
92
- model_dirs = [
93
- d
94
- for d in os.listdir(results_dir)
95
- if os.path.isdir(os.path.join(results_dir, d))
96
- ]
97
-
98
- if not model_dirs:
99
- printing.log_warning(f"No model directories found in {results_dir}")
100
- return
101
-
102
- model_dir = os.path.join(results_dir, model_dirs[0])
103
- printing.log_info(f"Found model directory: {model_dir}")
104
-
105
- # Find the results JSON file with timestamp
106
- results_files = [
107
- f
108
- for f in os.listdir(model_dir)
109
- if f.startswith("results_") and f.endswith(".json")
110
- ]
86
+ def _scale_metric(self, metric_name, value):
87
+ """
88
+ Scale metric value appropriately based on type and range
89
+
90
+ Args:
91
+ metric_name: Name of the metric (e.g., "acc,none", "ppl")
92
+ value: Numeric value of the metric
93
+
94
+ Returns:
95
+ tuple: (scaled_value, units, display_string)
96
+ """
97
+ fraction_metrics = {
98
+ "acc",
99
+ "accuracy",
100
+ "f1",
101
+ "exact_match",
102
+ "em",
103
+ "win_rate",
104
+ "recall",
105
+ "precision",
106
+ "rouge",
107
+ "bleu",
108
+ "meteor",
109
+ "bertscore",
110
+ "match",
111
+ "correct",
112
+ "pass",
113
+ "success_rate",
114
+ }
115
+
116
+ metric_base = metric_name.split(",")[0].lower()
117
+ is_fraction = any(
118
+ frac_metric in metric_base for frac_metric in fraction_metrics
119
+ )
120
+ is_in_unit_range = 0 <= value <= 1
111
121
 
112
- if not results_files:
113
- printing.log_warning(f"No results files found in {model_dir}")
122
+ if is_fraction and is_in_unit_range:
123
+ scaled_value = float(value) * 100
124
+ units = "%"
125
+ display_str = f"{value:.4f} ({scaled_value:.2f}%)"
126
+ else:
127
+ scaled_value = float(value)
128
+ units = "raw"
129
+ display_str = f"{value:.4f}"
130
+
131
+ return scaled_value, units, display_str
132
+
133
+ def _process_results(self, results_path, state):
134
+ """
135
+ Process evaluation results and save to state stats
136
+
137
+ Args:
138
+ results_path: Can be either a direct JSON file path or a directory path
139
+ state: State object to save metrics to
140
+ """
141
+ results_file_path = None
142
+
143
+ # Determine if this is a file or directory and find the JSON file
144
+ if os.path.isfile(results_path) and results_path.endswith(".json"):
145
+ # Direct JSON file path (modern format)
146
+ results_file_path = results_path
147
+ elif os.path.isdir(results_path):
148
+ # Look for model subdirectories
149
+ model_dirs = [
150
+ d
151
+ for d in os.listdir(results_path)
152
+ if os.path.isdir(os.path.join(results_path, d))
153
+ ]
154
+
155
+ if model_dirs:
156
+ # Format: results_dir/model_name/results_*.json
157
+ model_dir = os.path.join(results_path, model_dirs[0])
158
+ printing.log_info(f"Found model directory: {model_dir}")
159
+
160
+ results_files = [
161
+ f
162
+ for f in os.listdir(model_dir)
163
+ if f.startswith("results_") and f.endswith(".json")
164
+ ]
165
+
166
+ if results_files:
167
+ results_files.sort(reverse=True)
168
+ results_file_path = os.path.join(model_dir, results_files[0])
169
+ else:
170
+ printing.log_warning(f"No results files found in {model_dir}")
171
+ return
172
+ else:
173
+ printing.log_warning(f"No model directories found in {results_path}")
174
+ return
175
+ else:
176
+ # Handle case where lm-eval adds timestamp to expected filename
177
+ results_dir = os.path.dirname(results_path)
178
+ if os.path.exists(results_dir):
179
+ json_files = [f for f in os.listdir(results_dir) if f.endswith(".json")]
180
+ if json_files:
181
+ results_file_path = os.path.join(results_dir, json_files[0])
182
+ printing.log_info(f"Found results file: {results_file_path}")
183
+ else:
184
+ printing.log_warning(f"No JSON results file found in {results_dir}")
185
+ return
186
+ else:
187
+ printing.log_warning(f"Results path not found at {results_path}")
188
+ return
189
+
190
+ if not results_file_path or not os.path.exists(results_file_path):
191
+ printing.log_warning(f"Results file not found at {results_file_path}")
114
192
  return
115
193
 
116
- # Sort by timestamp
117
- results_files.sort(reverse=True)
118
- results_file_path = os.path.join(model_dir, results_files[0])
119
194
  printing.log_info(f"Processing results from {results_file_path}")
120
195
 
121
- # Read and process results
122
196
  try:
123
197
  with open(results_file_path, "r", encoding="utf-8") as f:
124
198
  results = json.load(f)
@@ -132,18 +206,21 @@ class LMEvalHarness(Tool):
132
206
  if isinstance(value, (int, float)) and not metric.startswith(
133
207
  "alias"
134
208
  ):
135
- # Format metric name for stats
136
- clean_metric = metric.replace(",", "_")
209
+ # Format metric name for stats - remove ,none suffix
210
+ clean_metric = metric.split(",")[0] # Remove ,none suffix
137
211
  stat_name = f"lm_eval_{task_name}_{clean_metric}"
138
212
 
139
- # Save to state stats as percentage
140
- state.save_stat(stat_name, float(value) * 100)
141
- state.save_stat(f"{stat_name}_units", "%")
213
+ # Scale metric appropriately
214
+ scaled_value, units, value_str = self._scale_metric(
215
+ metric, value
216
+ )
217
+ display_str = f" {metric}: {value_str}"
218
+
219
+ state.save_stat(stat_name, scaled_value)
220
+ state.save_stat(f"{stat_name}_units", units)
142
221
  self.status_stats.append(stat_name)
143
222
 
144
- printing.log_info(
145
- f" {metric}: {value:.4f} ({value*100:.2f}%)"
146
- )
223
+ printing.log_info(display_str)
147
224
 
148
225
  # Save summary metrics if available
149
226
  avg_metrics = {}
@@ -167,12 +244,17 @@ class LMEvalHarness(Tool):
167
244
  if values:
168
245
  avg_value = sum(values) / len(values)
169
246
  stat_name = f"lm_eval_average_{metric}"
170
- state.save_stat(stat_name, float(avg_value) * 100)
171
- state.save_stat(f"{stat_name}_units", "%")
172
- self.status_stats.append(stat_name)
173
- printing.log_info(
174
- f"Average {metric}: {avg_value:.4f} ({avg_value*100:.2f}%)"
247
+
248
+ # Apply same scaling logic as individual metrics
249
+ scaled_avg, units, value_str = self._scale_metric(
250
+ metric, avg_value
175
251
  )
252
+ display_str = f"Average {metric}: {value_str}"
253
+
254
+ state.save_stat(stat_name, scaled_avg)
255
+ state.save_stat(f"{stat_name}_units", units)
256
+ self.status_stats.append(stat_name)
257
+ printing.log_info(display_str)
176
258
 
177
259
  except (IOError, json.JSONDecodeError) as e:
178
260
  printing.log_error(f"Error processing results: {e}")
@@ -189,6 +271,20 @@ class LMEvalHarness(Tool):
189
271
  output_path: Optional[str] = None,
190
272
  ) -> State:
191
273
 
274
+ # Check if lm-eval is available
275
+ try:
276
+ # pylint: disable=unused-import
277
+ import lm_eval
278
+ except ImportError:
279
+ error_msg = (
280
+ "lm-eval-harness is required but not installed. "
281
+ "Please install it using one of the following commands:\n"
282
+ " pip install lemonade-sdk[dev]\n"
283
+ " pip install -e .[dev]\n"
284
+ )
285
+ printing.log_error(error_msg)
286
+ raise ImportError(error_msg)
287
+
192
288
  import requests
193
289
  from lemonade.tools.server.utils.thread import ServerRunner
194
290
 
@@ -261,7 +357,7 @@ class LMEvalHarness(Tool):
261
357
  raise RuntimeError("Failed to start the server")
262
358
 
263
359
  # Build API URL
264
- results_file = os.path.join(output_path, f"{task}_results")
360
+ results_file = os.path.join(output_path, f"{task}_results.json")
265
361
 
266
362
  printing.log_info(f"Running lm-eval-harness on {task}...")
267
363
 
@@ -312,9 +408,8 @@ class LMEvalHarness(Tool):
312
408
  "Results obtained successfully but couldn't display due to encoding issues"
313
409
  )
314
410
 
315
- # Process results from the correct location
316
- results_dir = os.path.join(output_path, f"{task}_results")
317
- self._process_results(results_dir, state)
411
+ # Process results from the JSON file
412
+ self._process_results(results_file, state)
318
413
 
319
414
  except subprocess.CalledProcessError as e:
320
415
  printing.log_error(f"Error running lm-eval-harness: {e}")
lemonade/tools/adapter.py CHANGED
@@ -10,11 +10,14 @@ class ModelAdapter(abc.ABC):
10
10
  """
11
11
  Self-benchmarking ModelAdapters can store their results in the
12
12
  tokens_per_second and time_to_first_token members.
13
+ ModelAdapters that run generate in a different process can store the
14
+ peak memory used (bytes) by that process in the peak_wset member.
13
15
  """
14
16
  self.tokens_per_second = None
15
17
  self.time_to_first_token = None
16
18
  self.prompt_tokens = None
17
19
  self.response_tokens = None
20
+ self.peak_wset = None
18
21
 
19
22
  self.type = "generic"
20
23
 
@@ -27,7 +30,9 @@ class ModelAdapter(abc.ABC):
27
30
  with recipe components, which themselves may not support a lot of arguments.
28
31
 
29
32
  The generate method should store prompt and response lengths (in tokens)
30
- in the prompt_tokens and response_tokens members.
33
+ in the prompt_tokens and response_tokens members. If a different process is used,
34
+ the generate method can also store the peak memory used by that process in the
35
+ peak_wset member.
31
36
  """
32
37
 
33
38
 
lemonade/tools/bench.py CHANGED
@@ -2,7 +2,6 @@ from abc import ABC, abstractmethod
2
2
  import argparse
3
3
  import os
4
4
  import platform
5
- import psutil
6
5
  from lemonade.state import State
7
6
  from lemonade.tools import Tool
8
7
  from lemonade.cache import Keys
@@ -29,7 +28,9 @@ class Bench(Tool, ABC):
29
28
  Keys.SECONDS_TO_FIRST_TOKEN,
30
29
  Keys.STD_DEV_SECONDS_TO_FIRST_TOKEN,
31
30
  Keys.TOKEN_GENERATION_TOKENS_PER_SECOND,
31
+ Keys.STD_DEV_TOKENS_PER_SECOND,
32
32
  Keys.PREFILL_TOKENS_PER_SECOND,
33
+ Keys.STD_DEV_PREFILL_TOKENS_PER_SECOND,
33
34
  Keys.PROMPT_TOKENS,
34
35
  Keys.RESPONSE_TOKENS,
35
36
  Keys.MAX_MEMORY_USED_GBYTE,
@@ -42,7 +43,9 @@ class Bench(Tool, ABC):
42
43
  self.mean_time_to_first_token_list = []
43
44
  self.std_dev_time_to_first_token_list = []
44
45
  self.prefill_tokens_per_second_list = []
46
+ self.std_dev_prefill_tokens_per_second_list = []
45
47
  self.token_generation_tokens_per_second_list = []
48
+ self.std_dev_token_generation_tokens_per_second_list = []
46
49
  self.max_memory_used_gb_list = []
47
50
 
48
51
  # Max memory used can only be measured on Windows systems
@@ -88,7 +91,7 @@ class Bench(Tool, ABC):
88
91
  default=[str(default_prompt_length)],
89
92
  metavar="PROMPT",
90
93
  help="Input one or more prompts to the LLM. Three formats are supported. "
91
- "1) integer: use a synthetic prompt with the specified length "
94
+ "1) integer: use a synthetic prompt with the specified token length "
92
95
  "2) str: use a user-provided prompt string "
93
96
  "3) path/to/prompt.txt: load the prompt from a text file. "
94
97
  f"(default: {default_prompt_length}) ",
@@ -190,11 +193,6 @@ class Bench(Tool, ABC):
190
193
  )
191
194
  self.first_run_prompt = False
192
195
 
193
- if self.save_max_memory_used:
194
- self.max_memory_used_gb_list.append(
195
- psutil.Process().memory_info().peak_wset / 1024**3
196
- )
197
-
198
196
  self.set_percent_progress(None)
199
197
  self.save_stats(state)
200
198
 
@@ -211,7 +209,10 @@ class Bench(Tool, ABC):
211
209
  output_tokens,
212
210
  **kwargs,
213
211
  ):
214
- pass
212
+ """
213
+ The run_prompt method should append the appropriate value to each of the per prompt
214
+ measurement statistics lists that are members of the Bench class.
215
+ """
215
216
 
216
217
  @staticmethod
217
218
  def get_item_or_list(lst):
@@ -246,10 +247,27 @@ class Bench(Tool, ABC):
246
247
  Keys.PREFILL_TOKENS_PER_SECOND,
247
248
  self.get_item_or_list(self.prefill_tokens_per_second_list),
248
249
  )
250
+ if not all(
251
+ element is None for element in self.std_dev_prefill_tokens_per_second_list
252
+ ):
253
+ state.save_stat(
254
+ Keys.STD_DEV_PREFILL_TOKENS_PER_SECOND,
255
+ self.get_item_or_list(self.std_dev_prefill_tokens_per_second_list),
256
+ )
249
257
  state.save_stat(
250
258
  Keys.TOKEN_GENERATION_TOKENS_PER_SECOND,
251
259
  self.get_item_or_list(self.token_generation_tokens_per_second_list),
252
260
  )
261
+ if not all(
262
+ element is None
263
+ for element in self.std_dev_token_generation_tokens_per_second_list
264
+ ):
265
+ state.save_stat(
266
+ Keys.STD_DEV_TOKENS_PER_SECOND,
267
+ self.get_item_or_list(
268
+ self.std_dev_token_generation_tokens_per_second_list
269
+ ),
270
+ )
253
271
  if self.save_max_memory_used:
254
272
  state.save_stat(
255
273
  Keys.MAX_MEMORY_USED_GBYTE,
@@ -10,16 +10,46 @@ import time
10
10
  from typing import List, Optional
11
11
 
12
12
  import requests
13
- from packaging.version import Version
13
+ from packaging.version import Version, InvalidVersion
14
14
 
15
15
 
16
- FLM_MINIMUM_VERSION = "0.9.10"
16
+ def get_flm_latest_version() -> Optional[str]:
17
+ """
18
+ Get and return the latest FLM version from "https://github.com/FastFlowLM/FastFlowLM/tags"
19
+ This uses the GitHub tags API.
20
+ """
21
+ url = "https://api.github.com/repos/FastFlowLM/FastFlowLM/tags"
22
+ try:
23
+ response = requests.get(url, timeout=10)
24
+ response.raise_for_status()
25
+ tags = response.json()
26
+ if not tags:
27
+ return None
28
+ # Tags are sorted in reverse chronological order; find the first that looks like a version
29
+ for tag in tags:
30
+ tag_name = tag.get("name", "")
31
+ # Accept tags of the form v0.9.10, 0.9.10, etc.
32
+ if tag_name.startswith("v"):
33
+ version_candidate = tag_name[1:]
34
+ else:
35
+ version_candidate = tag_name
36
+ try:
37
+ # validate it's a version string
38
+ _ = Version(version_candidate)
39
+ return version_candidate
40
+ except InvalidVersion:
41
+ continue
42
+ return None
43
+ except requests.exceptions.RequestException as e:
44
+ logging.debug("Error retrieving latest FLM version: %s", e)
45
+ return None
17
46
 
18
47
 
19
48
  def check_flm_version() -> Optional[str]:
20
49
  """
21
50
  Check if FLM is installed and return version, or None if not available.
22
51
  """
52
+ latest_version_str = get_flm_latest_version()
23
53
  try:
24
54
  result = subprocess.run(
25
55
  ["flm", "version"],
@@ -34,11 +64,11 @@ def check_flm_version() -> Optional[str]:
34
64
  output = result.stdout.strip()
35
65
  if output.startswith("FLM v"):
36
66
  version_str = output[5:] # Remove "FLM v" prefix
37
- return version_str
38
- return None
67
+ return version_str, latest_version_str
68
+ return None, latest_version_str
39
69
 
40
70
  except (subprocess.CalledProcessError, FileNotFoundError):
41
- return None
71
+ return None, latest_version_str
42
72
 
43
73
 
44
74
  def refresh_environment():
@@ -76,31 +106,42 @@ def install_flm():
76
106
  If not, download and run the GUI installer, then wait for completion.
77
107
  """
78
108
  # Check current FLM installation
79
- current_version = check_flm_version()
109
+ current_version, latest_version = check_flm_version()
80
110
 
81
- if current_version and Version(current_version) >= Version(FLM_MINIMUM_VERSION):
111
+ if (
112
+ current_version
113
+ and latest_version
114
+ and Version(current_version) == Version(latest_version)
115
+ ):
82
116
  logging.info(
83
- "FLM v%s is already installed and meets minimum version requirement (v%s)",
117
+ "FLM v%s is already installed and is up to date (latest version: v%s).",
84
118
  current_version,
85
- FLM_MINIMUM_VERSION,
119
+ latest_version,
86
120
  )
87
121
  return
88
122
 
89
123
  if current_version:
124
+ if not latest_version:
125
+ logging.info(
126
+ "Unable to detect the latest FLM version; continuing with installed FLM v%s.",
127
+ current_version,
128
+ )
129
+ return
90
130
  logging.info(
91
- "FLM v%s is installed but below minimum version v%s. Upgrading...",
131
+ "FLM v%s is installed but below latest version v%s. Upgrading...",
92
132
  current_version,
93
- FLM_MINIMUM_VERSION,
133
+ latest_version,
94
134
  )
135
+ verysilent = True
95
136
  else:
96
- logging.info(
97
- "FLM not found. Installing FLM v%s or later...", FLM_MINIMUM_VERSION
98
- )
137
+ logging.info("FLM not found. Installing FLM v%s or later...", latest_version)
138
+ verysilent = False
99
139
 
100
140
  # Download the installer
101
141
  # pylint: disable=line-too-long
102
142
  installer_url = "https://github.com/FastFlowLM/FastFlowLM/releases/latest/download/flm-setup.exe"
103
143
  installer_path = os.path.join(tempfile.gettempdir(), "flm-setup.exe")
144
+ installer_args = [installer_path, "/VERYSILENT"] if verysilent else [installer_path]
104
145
 
105
146
  try:
106
147
  # Remove existing installer if present
@@ -123,13 +164,15 @@ def install_flm():
123
164
  # Launch the installer GUI
124
165
  logging.warning(
125
166
  "Launching FLM installer GUI. Please complete the installation..."
167
+ if not verysilent
168
+ else "Installing FLM..."
126
169
  )
127
170
 
128
171
  # Launch installer and wait for it to complete
129
172
  if os.name == "nt": # Windows
130
- process = subprocess.Popen([installer_path], shell=True)
173
+ process = subprocess.Popen(installer_args, shell=True)
131
174
  else:
132
- process = subprocess.Popen([installer_path])
175
+ process = subprocess.Popen(installer_args)
133
176
 
134
177
  # Wait for installer to complete
135
178
  process.wait()
@@ -150,8 +193,8 @@ def install_flm():
150
193
  # Verify installation
151
194
  max_retries = 10
152
195
  for attempt in range(max_retries):
153
- new_version = check_flm_version()
154
- if new_version and Version(new_version) >= Version(FLM_MINIMUM_VERSION):
196
+ new_version, latest_version = check_flm_version()
197
+ if new_version and Version(new_version) == Version(latest_version):
155
198
  logging.info("FLM v%s successfully installed and verified", new_version)
156
199
  return
157
200
 
@@ -240,7 +283,12 @@ def get_flm_installed_models() -> List[str]:
240
283
 
241
284
  return installed_checkpoints
242
285
 
243
- except (subprocess.CalledProcessError, FileNotFoundError, AttributeError):
286
+ except (
287
+ subprocess.CalledProcessError,
288
+ FileNotFoundError,
289
+ AttributeError,
290
+ NotADirectoryError,
291
+ ):
244
292
  # FLM not installed, not available, or output parsing failed
245
293
  return []
246
294
 
@@ -249,7 +297,7 @@ def is_flm_available() -> bool:
249
297
  """
250
298
  Check if FLM is available and meets minimum version requirements.
251
299
  """
252
- current_version = check_flm_version()
253
- return current_version is not None and Version(current_version) >= Version(
254
- FLM_MINIMUM_VERSION
300
+ current_version, latest_version = check_flm_version()
301
+ return current_version is not None and Version(current_version) == Version(
302
+ latest_version
255
303
  )
@@ -1,6 +1,7 @@
1
1
  import argparse
2
2
  import statistics
3
3
  from statistics import StatisticsError
4
+ import psutil
4
5
  from lemonade.state import State
5
6
  from lemonade.cache import Keys
6
7
  from lemonade.tools.bench import Bench
@@ -75,7 +76,7 @@ class HuggingfaceBench(Bench):
75
76
  warmup_iterations: int,
76
77
  output_tokens: int,
77
78
  num_beams: int = default_beams,
78
- ) -> State:
79
+ ):
79
80
  """
80
81
  We don't have access to the internal timings of generate(), so time to first
81
82
  token (TTFT, aka prefill latency) and token/s are calculated using the following formulae:
@@ -176,6 +177,10 @@ class HuggingfaceBench(Bench):
176
177
  self.token_generation_tokens_per_second_list.append(
177
178
  (mean_token_len - 1) / mean_decode_latency
178
179
  )
180
+ if self.save_max_memory_used:
181
+ self.max_memory_used_gb_list.append(
182
+ psutil.Process().memory_info().peak_wset / 1024**3
183
+ )
179
184
 
180
185
 
181
186
  # This file was originally licensed under Apache 2.0. It has been modified.