lemonade-sdk 9.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. lemonade/__init__.py +5 -0
  2. lemonade/api.py +180 -0
  3. lemonade/cache.py +92 -0
  4. lemonade/cli.py +173 -0
  5. lemonade/common/__init__.py +0 -0
  6. lemonade/common/build.py +176 -0
  7. lemonade/common/cli_helpers.py +139 -0
  8. lemonade/common/exceptions.py +98 -0
  9. lemonade/common/filesystem.py +368 -0
  10. lemonade/common/inference_engines.py +408 -0
  11. lemonade/common/network.py +93 -0
  12. lemonade/common/printing.py +110 -0
  13. lemonade/common/status.py +471 -0
  14. lemonade/common/system_info.py +1411 -0
  15. lemonade/common/test_helpers.py +28 -0
  16. lemonade/profilers/__init__.py +1 -0
  17. lemonade/profilers/agt_power.py +437 -0
  18. lemonade/profilers/hwinfo_power.py +429 -0
  19. lemonade/profilers/memory_tracker.py +259 -0
  20. lemonade/profilers/profiler.py +58 -0
  21. lemonade/sequence.py +363 -0
  22. lemonade/state.py +159 -0
  23. lemonade/tools/__init__.py +1 -0
  24. lemonade/tools/accuracy.py +432 -0
  25. lemonade/tools/adapter.py +114 -0
  26. lemonade/tools/bench.py +302 -0
  27. lemonade/tools/flm/__init__.py +1 -0
  28. lemonade/tools/flm/utils.py +305 -0
  29. lemonade/tools/huggingface/bench.py +187 -0
  30. lemonade/tools/huggingface/load.py +235 -0
  31. lemonade/tools/huggingface/utils.py +359 -0
  32. lemonade/tools/humaneval.py +264 -0
  33. lemonade/tools/llamacpp/bench.py +255 -0
  34. lemonade/tools/llamacpp/load.py +222 -0
  35. lemonade/tools/llamacpp/utils.py +1260 -0
  36. lemonade/tools/management_tools.py +319 -0
  37. lemonade/tools/mmlu.py +319 -0
  38. lemonade/tools/oga/__init__.py +0 -0
  39. lemonade/tools/oga/bench.py +120 -0
  40. lemonade/tools/oga/load.py +804 -0
  41. lemonade/tools/oga/migration.py +403 -0
  42. lemonade/tools/oga/utils.py +462 -0
  43. lemonade/tools/perplexity.py +147 -0
  44. lemonade/tools/prompt.py +263 -0
  45. lemonade/tools/report/__init__.py +0 -0
  46. lemonade/tools/report/llm_report.py +203 -0
  47. lemonade/tools/report/table.py +899 -0
  48. lemonade/tools/server/__init__.py +0 -0
  49. lemonade/tools/server/flm.py +133 -0
  50. lemonade/tools/server/llamacpp.py +320 -0
  51. lemonade/tools/server/serve.py +2123 -0
  52. lemonade/tools/server/static/favicon.ico +0 -0
  53. lemonade/tools/server/static/index.html +279 -0
  54. lemonade/tools/server/static/js/chat.js +1059 -0
  55. lemonade/tools/server/static/js/model-settings.js +183 -0
  56. lemonade/tools/server/static/js/models.js +1395 -0
  57. lemonade/tools/server/static/js/shared.js +556 -0
  58. lemonade/tools/server/static/logs.html +191 -0
  59. lemonade/tools/server/static/styles.css +2654 -0
  60. lemonade/tools/server/static/webapp.html +321 -0
  61. lemonade/tools/server/tool_calls.py +153 -0
  62. lemonade/tools/server/tray.py +664 -0
  63. lemonade/tools/server/utils/macos_tray.py +226 -0
  64. lemonade/tools/server/utils/port.py +77 -0
  65. lemonade/tools/server/utils/thread.py +85 -0
  66. lemonade/tools/server/utils/windows_tray.py +408 -0
  67. lemonade/tools/server/webapp.py +34 -0
  68. lemonade/tools/server/wrapped_server.py +559 -0
  69. lemonade/tools/tool.py +374 -0
  70. lemonade/version.py +1 -0
  71. lemonade_install/__init__.py +1 -0
  72. lemonade_install/install.py +239 -0
  73. lemonade_sdk-9.1.1.dist-info/METADATA +276 -0
  74. lemonade_sdk-9.1.1.dist-info/RECORD +84 -0
  75. lemonade_sdk-9.1.1.dist-info/WHEEL +5 -0
  76. lemonade_sdk-9.1.1.dist-info/entry_points.txt +5 -0
  77. lemonade_sdk-9.1.1.dist-info/licenses/LICENSE +201 -0
  78. lemonade_sdk-9.1.1.dist-info/licenses/NOTICE.md +47 -0
  79. lemonade_sdk-9.1.1.dist-info/top_level.txt +3 -0
  80. lemonade_server/cli.py +805 -0
  81. lemonade_server/model_manager.py +758 -0
  82. lemonade_server/pydantic_models.py +159 -0
  83. lemonade_server/server_models.json +643 -0
  84. lemonade_server/settings.py +39 -0
@@ -0,0 +1,302 @@
1
+ from abc import ABC, abstractmethod
2
+ import argparse
3
+ import os
4
+ import platform
5
+ from lemonade.state import State
6
+ from lemonade.tools import Tool
7
+ from lemonade.cache import Keys
8
+
9
+ default_iterations = 10
10
+ default_warmup_runs = 5
11
+ default_prompt_length = 64
12
+ default_output_tokens = 32
13
+ default_prompt = "Hello, I am conscious and"
14
+
15
+
16
+ class Bench(Tool, ABC):
17
+ """
18
+ Abstract parent class for tools that benchmark the performance of the generate()
19
+ method of an LLM.
20
+ """
21
+
22
+ def __init__(self, monitor_message="Benchmarking LLM"):
23
+ super().__init__(monitor_message)
24
+
25
+ # The minimum set of statistics that a benchmark tool will produce
26
+ # Inherited tools should append any additional statistics they generate to this list
27
+ self.status_stats = [
28
+ Keys.SECONDS_TO_FIRST_TOKEN,
29
+ Keys.STD_DEV_SECONDS_TO_FIRST_TOKEN,
30
+ Keys.TOKEN_GENERATION_TOKENS_PER_SECOND,
31
+ Keys.STD_DEV_TOKENS_PER_SECOND,
32
+ Keys.PREFILL_TOKENS_PER_SECOND,
33
+ Keys.STD_DEV_PREFILL_TOKENS_PER_SECOND,
34
+ Keys.PROMPT_TOKENS,
35
+ Keys.RESPONSE_TOKENS,
36
+ Keys.MAX_MEMORY_USED_GBYTE,
37
+ ]
38
+
39
+ # Minimum per measurement statistics
40
+ # Inherited tools should add additional lists for other per prompt statistics
41
+ self.input_ids_len_list = []
42
+ self.tokens_out_len_list = []
43
+ self.mean_time_to_first_token_list = []
44
+ self.std_dev_time_to_first_token_list = []
45
+ self.prefill_tokens_per_second_list = []
46
+ self.std_dev_prefill_tokens_per_second_list = []
47
+ self.token_generation_tokens_per_second_list = []
48
+ self.std_dev_token_generation_tokens_per_second_list = []
49
+ self.max_memory_used_gb_list = []
50
+
51
+ # Max memory used can only be measured on Windows systems
52
+ self.save_max_memory_used = platform.system() == "Windows"
53
+
54
+ # This is set to True only for the duration of the first call to run_prompt
55
+ self.first_run_prompt = None
56
+
57
+ @staticmethod
58
+ def parser(parser: argparse.ArgumentParser = None, add_help: bool = True):
59
+ # Allow inherited classes to initialize and pass in a parser, add parameters to it if so
60
+ if parser is None:
61
+ parser = __class__.helpful_parser(
62
+ short_description="Benchmark an LLM", add_help=add_help
63
+ )
64
+
65
+ parser.add_argument(
66
+ "--iterations",
67
+ "-i",
68
+ required=False,
69
+ type=int,
70
+ default=default_iterations,
71
+ help="Number of benchmarking iterations to run (default: "
72
+ f"{default_iterations})",
73
+ )
74
+
75
+ parser.add_argument(
76
+ "--warmup-iterations",
77
+ "-w",
78
+ required=False,
79
+ type=int,
80
+ default=default_warmup_runs,
81
+ help="Number of benchmarking iterations to use for cache warmup "
82
+ "(the results of these iterations "
83
+ f"are not included in the results; default: {default_warmup_runs})",
84
+ )
85
+
86
+ parser.add_argument(
87
+ "--prompts",
88
+ "-p",
89
+ nargs="+",
90
+ required=False,
91
+ default=[str(default_prompt_length)],
92
+ metavar="PROMPT",
93
+ help="Input one or more prompts to the LLM. Three formats are supported. "
94
+ "1) integer: use a synthetic prompt with the specified token length "
95
+ "2) str: use a user-provided prompt string "
96
+ "3) path/to/prompt.txt: load the prompt from a text file. "
97
+ f"(default: {default_prompt_length}) ",
98
+ )
99
+
100
+ parser.add_argument(
101
+ "--output-tokens",
102
+ required=False,
103
+ type=int,
104
+ default=default_output_tokens,
105
+ help="Number of new tokens the LLM should make (default: "
106
+ f"{default_output_tokens})",
107
+ )
108
+
109
+ return parser
110
+
111
+ def get_prompt_str(self, _state, token_length):
112
+ """
113
+ Returns a string with approximately the prescribed token length.
114
+ Note: Actual token length is dependent on the tokenizer.
115
+ """
116
+ return "word " * (token_length - 1)
117
+
118
+ def parse(self, state: State, args, known_only=True) -> argparse.Namespace:
119
+ """
120
+ Helper function to parse CLI arguments into the args expected by run()
121
+ """
122
+
123
+ parsed_args = super().parse(state, args, known_only)
124
+
125
+ if parsed_args.prompts is None:
126
+ parsed_args.prompts = [str(default_prompt_length)]
127
+
128
+ # Decode prompt arg into a list of prompt strings
129
+ prompt_strings = []
130
+ for prompt_item in parsed_args.prompts:
131
+ if prompt_item.isdigit():
132
+ # Generate a prompt with the requested length
133
+ token_length = int(prompt_item)
134
+ prompt_strings.append(self.get_prompt_str(state, token_length))
135
+
136
+ elif os.path.exists(prompt_item):
137
+ with open(prompt_item, "r", encoding="utf-8") as f:
138
+ prompt_strings.append(f.read())
139
+
140
+ else:
141
+ # No change to the prompt
142
+ prompt_strings.append(prompt_item)
143
+ parsed_args.prompts = prompt_strings
144
+
145
+ return parsed_args
146
+
147
+ def run(
148
+ self,
149
+ state: State,
150
+ prompts: list[str] = None,
151
+ iterations: int = default_iterations,
152
+ warmup_iterations: int = default_warmup_runs,
153
+ output_tokens: int = default_output_tokens,
154
+ **kwargs,
155
+ ) -> State:
156
+ """
157
+ Args:
158
+ - prompts: List of input prompts used as starting points for LLM text generation
159
+ - iterations: number of benchmarking samples to take; results are
160
+ reported as the median and mean of the samples.
161
+ - warmup_iterations: subset of the iterations to treat as warmup,
162
+ and not included in the results.
163
+ - output_tokens: Number of new tokens LLM to create.
164
+ - kwargs: Additional parameters used by bench tools
165
+ """
166
+
167
+ if prompts is None:
168
+ prompts = ["word " * (default_prompt_length - 2)]
169
+ elif isinstance(prompts, str):
170
+ prompts = [prompts]
171
+
172
+ state.save_stat("prompts", prompts)
173
+ state.save_stat("iterations", iterations)
174
+ state.save_stat("warmup_iterations", warmup_iterations)
175
+ state.save_stat("output_tokens", output_tokens)
176
+
177
+ counter = 0
178
+ report_progress_fn = lambda x: self.set_percent_progress(
179
+ 100 * (counter + x) / len(prompts)
180
+ )
181
+ self.first_run_prompt = True
182
+ for counter, prompt in enumerate(prompts):
183
+ report_progress_fn(0)
184
+
185
+ self.run_prompt(
186
+ state,
187
+ report_progress_fn,
188
+ prompt,
189
+ iterations,
190
+ warmup_iterations,
191
+ output_tokens,
192
+ **kwargs,
193
+ )
194
+ self.first_run_prompt = False
195
+
196
+ self.set_percent_progress(None)
197
+ self.save_stats(state)
198
+
199
+ return state
200
+
201
+ @abstractmethod
202
+ def run_prompt(
203
+ self,
204
+ state,
205
+ report_progress_fn,
206
+ prompt,
207
+ iterations,
208
+ warmup_iterations,
209
+ output_tokens,
210
+ **kwargs,
211
+ ):
212
+ """
213
+ The run_prompt method should append the appropriate value to each of the per prompt
214
+ measurement statistics lists that are members of the Bench class.
215
+ """
216
+
217
+ @staticmethod
218
+ def get_item_or_list(lst):
219
+ """
220
+ If the list is just a single item then return the item, else return the list
221
+ """
222
+ if len(lst) == 1:
223
+ return lst[0]
224
+ else:
225
+ return lst
226
+
227
+ def save_stats(self, state):
228
+ # Save performance data to stats
229
+ state.save_stat(
230
+ Keys.PROMPT_TOKENS, self.get_item_or_list(self.input_ids_len_list)
231
+ )
232
+ state.save_stat(
233
+ Keys.RESPONSE_TOKENS, self.get_item_or_list(self.tokens_out_len_list)
234
+ )
235
+ state.save_stat(
236
+ Keys.SECONDS_TO_FIRST_TOKEN,
237
+ self.get_item_or_list(self.mean_time_to_first_token_list),
238
+ )
239
+ if not all(
240
+ element is None for element in self.std_dev_time_to_first_token_list
241
+ ):
242
+ state.save_stat(
243
+ Keys.STD_DEV_SECONDS_TO_FIRST_TOKEN,
244
+ self.get_item_or_list(self.std_dev_time_to_first_token_list),
245
+ )
246
+ state.save_stat(
247
+ Keys.PREFILL_TOKENS_PER_SECOND,
248
+ self.get_item_or_list(self.prefill_tokens_per_second_list),
249
+ )
250
+ if not all(
251
+ element is None for element in self.std_dev_prefill_tokens_per_second_list
252
+ ):
253
+ state.save_stat(
254
+ Keys.STD_DEV_PREFILL_TOKENS_PER_SECOND,
255
+ self.get_item_or_list(self.std_dev_prefill_tokens_per_second_list),
256
+ )
257
+ state.save_stat(
258
+ Keys.TOKEN_GENERATION_TOKENS_PER_SECOND,
259
+ self.get_item_or_list(self.token_generation_tokens_per_second_list),
260
+ )
261
+ if not all(
262
+ element is None
263
+ for element in self.std_dev_token_generation_tokens_per_second_list
264
+ ):
265
+ state.save_stat(
266
+ Keys.STD_DEV_TOKENS_PER_SECOND,
267
+ self.get_item_or_list(
268
+ self.std_dev_token_generation_tokens_per_second_list
269
+ ),
270
+ )
271
+ if self.save_max_memory_used:
272
+ state.save_stat(
273
+ Keys.MAX_MEMORY_USED_GBYTE,
274
+ self.get_item_or_list(self.max_memory_used_gb_list),
275
+ )
276
+
277
+ @staticmethod
278
+ def not_enough_tokens(output_tokens: int):
279
+ """
280
+ Raise an exception that explains why a benchmark did not produce any results
281
+ """
282
+
283
+ raise ValueError(
284
+ "Your model was benchmarked, however none of the benchmarking "
285
+ "iterations produced the requested amount of output tokens "
286
+ f"(currently {output_tokens}), so "
287
+ "the results have been discarded. You have the following options "
288
+ "to solve this: \n"
289
+ "1. Use the -p option to change the prompt to something that will "
290
+ "produce more output tokens. For example, 'The extremely long "
291
+ "story of my life, told in excruciating details is:' "
292
+ "is an example of a prompt that will result in a lot of output. \n"
293
+ "2. Set a lower value for --output-tokens to make it more likely "
294
+ "that the model will produce enough. \n"
295
+ "3. Set more verbose hyperparameters. \n"
296
+ "4. Run more benchmarking iterations, to improve the chance of "
297
+ "getting at least one with enough output tokens. \n"
298
+ )
299
+
300
+
301
+ # This file was originally licensed under Apache 2.0. It has been modified.
302
+ # Modifications Copyright (c) 2025 AMD
@@ -0,0 +1 @@
1
+ # FLM (FastFlowLM) utilities for Lemonade SDK
@@ -0,0 +1,305 @@
1
+ """
2
+ FLM (FastFlowLM) utilities for installation, version checking, and model management.
3
+ """
4
+
5
+ import os
6
+ import logging
7
+ import subprocess
8
+ import tempfile
9
+ import time
10
+ from typing import List, Optional
11
+
12
+ import requests
13
+ from packaging.version import Version, InvalidVersion
14
+
15
+
16
+ def get_flm_latest_version() -> Optional[str]:
17
+ """
18
+ Get and return the latest FLM version from "https://github.com/FastFlowLM/FastFlowLM/tags"
19
+ This uses the GitHub tags API.
20
+ """
21
+ url = "https://api.github.com/repos/FastFlowLM/FastFlowLM/tags"
22
+ try:
23
+ response = requests.get(url, timeout=10)
24
+ response.raise_for_status()
25
+ tags = response.json()
26
+ if not tags:
27
+ return None
28
+ # Tags are sorted in reverse chronological order; find the first that looks like a version
29
+ for tag in tags:
30
+ tag_name = tag.get("name", "")
31
+ # Accept tags of the form v0.9.10, 0.9.10, etc.
32
+ if tag_name.startswith("v"):
33
+ version_candidate = tag_name[1:]
34
+ else:
35
+ version_candidate = tag_name
36
+ try:
37
+ # validate it's a version string
38
+ _ = Version(version_candidate)
39
+ return version_candidate
40
+ except InvalidVersion:
41
+ continue
42
+ return None
43
+ except requests.exceptions.RequestException as e:
44
+ logging.debug("Error retrieving latest FLM version: %s", e)
45
+ return None
46
+
47
+
48
+ def check_flm_version() -> Optional[str]:
49
+ """
50
+ Check if FLM is installed and return version, or None if not available.
51
+ """
52
+ latest_version_str = get_flm_latest_version()
53
+ try:
54
+ result = subprocess.run(
55
+ ["flm", "version"],
56
+ capture_output=True,
57
+ text=True,
58
+ check=True,
59
+ encoding="utf-8",
60
+ errors="replace",
61
+ )
62
+
63
+ # Parse version from output like "FLM v0.9.4"
64
+ output = result.stdout.strip()
65
+ if output.startswith("FLM v"):
66
+ version_str = output[5:] # Remove "FLM v" prefix
67
+ return version_str, latest_version_str
68
+ return None, latest_version_str
69
+
70
+ except (subprocess.CalledProcessError, FileNotFoundError):
71
+ return None, latest_version_str
72
+
73
+
74
+ def refresh_environment():
75
+ """
76
+ Refresh PATH to pick up newly installed executables.
77
+ """
78
+ if os.name == "nt": # Windows
79
+ # On Windows, we need to refresh the PATH from registry
80
+ import winreg
81
+
82
+ try:
83
+ with winreg.OpenKey(
84
+ winreg.HKEY_LOCAL_MACHINE,
85
+ r"SYSTEM\CurrentControlSet\Control\Session Manager\Environment",
86
+ ) as key:
87
+ path_value, _ = winreg.QueryValueEx(key, "PATH")
88
+ os.environ["PATH"] = path_value + ";" + os.environ.get("PATH", "")
89
+ except Exception as e: # pylint: disable=broad-except
90
+ logging.debug("Could not refresh PATH from registry: %s", e)
91
+
92
+ # Also try to add common installation paths
93
+ common_paths = [
94
+ r"C:\Program Files\FLM",
95
+ r"C:\Program Files (x86)\FLM",
96
+ os.path.expanduser(r"~\AppData\Local\FLM"),
97
+ ]
98
+ for path in common_paths:
99
+ if os.path.exists(path) and path not in os.environ.get("PATH", ""):
100
+ os.environ["PATH"] = path + ";" + os.environ.get("PATH", "")
101
+
102
+
103
+ def install_flm():
104
+ """
105
+ Check if FLM is installed and at minimum version.
106
+ If not, download and run the GUI installer, then wait for completion.
107
+ """
108
+ # Check current FLM installation
109
+ current_version, latest_version = check_flm_version()
110
+
111
+ if (
112
+ current_version
113
+ and latest_version
114
+ and Version(current_version) == Version(latest_version)
115
+ ):
116
+ logging.info(
117
+ "FLM v%s is already installed and is up to date (latest version: v%s).",
118
+ current_version,
119
+ latest_version,
120
+ )
121
+ return
122
+
123
+ if current_version:
124
+ if not latest_version:
125
+ logging.info(
126
+ "Unable to detect the latest FLM version; continuing with installed FLM v%s.",
127
+ current_version,
128
+ )
129
+ return
130
+ logging.info(
131
+ "FLM v%s is installed but below latest version v%s. Upgrading...",
132
+ current_version,
133
+ latest_version,
134
+ )
135
+ verysilent = True
136
+ else:
137
+ logging.info("FLM not found. Installing FLM v%s or later...", latest_version)
138
+ verysilent = False
139
+
140
+ # Download the installer
141
+ # pylint: disable=line-too-long
142
+ installer_url = "https://github.com/FastFlowLM/FastFlowLM/releases/latest/download/flm-setup.exe"
143
+ installer_path = os.path.join(tempfile.gettempdir(), "flm-setup.exe")
144
+ installer_args = [installer_path, "/VERYSILENT"] if verysilent else [installer_path]
145
+
146
+ try:
147
+ # Remove existing installer if present
148
+ if os.path.exists(installer_path):
149
+ os.remove(installer_path)
150
+
151
+ logging.info("Downloading FLM installer...")
152
+ response = requests.get(installer_url, stream=True, timeout=30)
153
+ response.raise_for_status()
154
+
155
+ # Save installer to disk
156
+ with open(installer_path, "wb") as f:
157
+ for chunk in response.iter_content(chunk_size=8192):
158
+ f.write(chunk)
159
+ f.flush()
160
+ os.fsync(f.fileno())
161
+
162
+ logging.info("Downloaded FLM installer to %s", installer_path)
163
+
164
+ # Launch the installer GUI
165
+ logging.warning(
166
+ "Launching FLM installer GUI. Please complete the installation..."
167
+ if not verysilent
168
+ else "Installing FLM..."
169
+ )
170
+
171
+ # Launch installer and wait for it to complete
172
+ if os.name == "nt": # Windows
173
+ process = subprocess.Popen(installer_args, shell=True)
174
+ else:
175
+ process = subprocess.Popen(installer_args)
176
+
177
+ # Wait for installer to complete
178
+ process.wait()
179
+
180
+ if process.returncode != 0:
181
+ raise RuntimeError(
182
+ f"FLM installer failed with exit code {process.returncode}"
183
+ )
184
+
185
+ logging.info("FLM installer completed successfully")
186
+
187
+ # Refresh environment to pick up new PATH entries
188
+ refresh_environment()
189
+
190
+ # Wait a moment for system to update
191
+ time.sleep(2)
192
+
193
+ # Verify installation
194
+ max_retries = 10
195
+ for attempt in range(max_retries):
196
+ new_version, latest_version = check_flm_version()
197
+ if new_version and Version(new_version) == Version(latest_version):
198
+ logging.info("FLM v%s successfully installed and verified", new_version)
199
+ return
200
+
201
+ if attempt < max_retries - 1:
202
+ logging.debug(
203
+ "FLM not yet available in PATH, retrying... (attempt %d/%d)",
204
+ attempt + 1,
205
+ max_retries,
206
+ )
207
+ time.sleep(3)
208
+ refresh_environment()
209
+
210
+ # Final check failed
211
+ raise RuntimeError(
212
+ "FLM installation completed but 'flm' command is not available in PATH. "
213
+ "Please ensure FLM is properly installed and available in your system PATH."
214
+ )
215
+
216
+ except requests.RequestException as e:
217
+ raise RuntimeError(f"Failed to download FLM installer: {e}") from e
218
+ except Exception as e:
219
+ raise RuntimeError(f"FLM installation failed: {e}") from e
220
+ finally:
221
+ # Clean up installer file
222
+ if os.path.exists(installer_path):
223
+ try:
224
+ os.remove(installer_path)
225
+ except OSError:
226
+ pass # Ignore cleanup errors
227
+
228
+
229
+ def download_flm_model(config_checkpoint, _=None, do_not_upgrade=False) -> dict:
230
+ """
231
+ Downloads the FLM model for the given configuration.
232
+
233
+ Args:
234
+ config_checkpoint: name of the FLM model to install.
235
+ _: placeholder for `config_mmproj`, which is standard
236
+ for WrappedServer (see llamacpp/utils.py) .
237
+ do_not_upgrade: whether to re-download the model if it is already
238
+ available.
239
+ """
240
+
241
+ if do_not_upgrade:
242
+ command = ["flm", "pull", f"{config_checkpoint}"]
243
+ else:
244
+ command = ["flm", "pull", f"{config_checkpoint}", "--force"]
245
+
246
+ subprocess.run(command, check=True)
247
+
248
+
249
+ def get_flm_installed_models() -> List[str]:
250
+ """
251
+ Parse FLM model list and return installed model checkpoints.
252
+
253
+ Uses the improved FLM CLI methodology with --filter and --quiet flags
254
+ for cleaner, more reliable output parsing.
255
+
256
+ Returns:
257
+ List of installed FLM model checkpoints (e.g., ["llama3.2:1b", "gemma3:4b"])
258
+ """
259
+ try:
260
+ result = subprocess.run(
261
+ ["flm", "list", "--filter", "installed", "--quiet"],
262
+ capture_output=True,
263
+ text=True,
264
+ check=True,
265
+ encoding="utf-8",
266
+ errors="replace",
267
+ )
268
+
269
+ # Check if we got valid output
270
+ if not result.stdout:
271
+ return []
272
+
273
+ installed_checkpoints = []
274
+
275
+ lines = result.stdout.strip().split("\n")
276
+ for line in lines:
277
+ line = line.strip()
278
+ # Skip the "Models:" header line
279
+ if line == "Models:" or not line:
280
+ continue
281
+ # Parse model checkpoint (format: " - modelname:tag")
282
+ if line.startswith("- "):
283
+ checkpoint = line[2:].strip()
284
+ installed_checkpoints.append(checkpoint)
285
+
286
+ return installed_checkpoints
287
+
288
+ except (
289
+ subprocess.CalledProcessError,
290
+ FileNotFoundError,
291
+ AttributeError,
292
+ NotADirectoryError,
293
+ ):
294
+ # FLM not installed, not available, or output parsing failed
295
+ return []
296
+
297
+
298
+ def is_flm_available() -> bool:
299
+ """
300
+ Check if FLM is available and meets minimum version requirements.
301
+ """
302
+ current_version, latest_version = check_flm_version()
303
+ return current_version is not None and Version(current_version) == Version(
304
+ latest_version
305
+ )