lemonade-sdk 9.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. lemonade/__init__.py +5 -0
  2. lemonade/api.py +180 -0
  3. lemonade/cache.py +92 -0
  4. lemonade/cli.py +173 -0
  5. lemonade/common/__init__.py +0 -0
  6. lemonade/common/build.py +176 -0
  7. lemonade/common/cli_helpers.py +139 -0
  8. lemonade/common/exceptions.py +98 -0
  9. lemonade/common/filesystem.py +368 -0
  10. lemonade/common/inference_engines.py +408 -0
  11. lemonade/common/network.py +93 -0
  12. lemonade/common/printing.py +110 -0
  13. lemonade/common/status.py +471 -0
  14. lemonade/common/system_info.py +1411 -0
  15. lemonade/common/test_helpers.py +28 -0
  16. lemonade/profilers/__init__.py +1 -0
  17. lemonade/profilers/agt_power.py +437 -0
  18. lemonade/profilers/hwinfo_power.py +429 -0
  19. lemonade/profilers/memory_tracker.py +259 -0
  20. lemonade/profilers/profiler.py +58 -0
  21. lemonade/sequence.py +363 -0
  22. lemonade/state.py +159 -0
  23. lemonade/tools/__init__.py +1 -0
  24. lemonade/tools/accuracy.py +432 -0
  25. lemonade/tools/adapter.py +114 -0
  26. lemonade/tools/bench.py +302 -0
  27. lemonade/tools/flm/__init__.py +1 -0
  28. lemonade/tools/flm/utils.py +305 -0
  29. lemonade/tools/huggingface/bench.py +187 -0
  30. lemonade/tools/huggingface/load.py +235 -0
  31. lemonade/tools/huggingface/utils.py +359 -0
  32. lemonade/tools/humaneval.py +264 -0
  33. lemonade/tools/llamacpp/bench.py +255 -0
  34. lemonade/tools/llamacpp/load.py +222 -0
  35. lemonade/tools/llamacpp/utils.py +1260 -0
  36. lemonade/tools/management_tools.py +319 -0
  37. lemonade/tools/mmlu.py +319 -0
  38. lemonade/tools/oga/__init__.py +0 -0
  39. lemonade/tools/oga/bench.py +120 -0
  40. lemonade/tools/oga/load.py +804 -0
  41. lemonade/tools/oga/migration.py +403 -0
  42. lemonade/tools/oga/utils.py +462 -0
  43. lemonade/tools/perplexity.py +147 -0
  44. lemonade/tools/prompt.py +263 -0
  45. lemonade/tools/report/__init__.py +0 -0
  46. lemonade/tools/report/llm_report.py +203 -0
  47. lemonade/tools/report/table.py +899 -0
  48. lemonade/tools/server/__init__.py +0 -0
  49. lemonade/tools/server/flm.py +133 -0
  50. lemonade/tools/server/llamacpp.py +320 -0
  51. lemonade/tools/server/serve.py +2123 -0
  52. lemonade/tools/server/static/favicon.ico +0 -0
  53. lemonade/tools/server/static/index.html +279 -0
  54. lemonade/tools/server/static/js/chat.js +1059 -0
  55. lemonade/tools/server/static/js/model-settings.js +183 -0
  56. lemonade/tools/server/static/js/models.js +1395 -0
  57. lemonade/tools/server/static/js/shared.js +556 -0
  58. lemonade/tools/server/static/logs.html +191 -0
  59. lemonade/tools/server/static/styles.css +2654 -0
  60. lemonade/tools/server/static/webapp.html +321 -0
  61. lemonade/tools/server/tool_calls.py +153 -0
  62. lemonade/tools/server/tray.py +664 -0
  63. lemonade/tools/server/utils/macos_tray.py +226 -0
  64. lemonade/tools/server/utils/port.py +77 -0
  65. lemonade/tools/server/utils/thread.py +85 -0
  66. lemonade/tools/server/utils/windows_tray.py +408 -0
  67. lemonade/tools/server/webapp.py +34 -0
  68. lemonade/tools/server/wrapped_server.py +559 -0
  69. lemonade/tools/tool.py +374 -0
  70. lemonade/version.py +1 -0
  71. lemonade_install/__init__.py +1 -0
  72. lemonade_install/install.py +239 -0
  73. lemonade_sdk-9.1.1.dist-info/METADATA +276 -0
  74. lemonade_sdk-9.1.1.dist-info/RECORD +84 -0
  75. lemonade_sdk-9.1.1.dist-info/WHEEL +5 -0
  76. lemonade_sdk-9.1.1.dist-info/entry_points.txt +5 -0
  77. lemonade_sdk-9.1.1.dist-info/licenses/LICENSE +201 -0
  78. lemonade_sdk-9.1.1.dist-info/licenses/NOTICE.md +47 -0
  79. lemonade_sdk-9.1.1.dist-info/top_level.txt +3 -0
  80. lemonade_server/cli.py +805 -0
  81. lemonade_server/model_manager.py +758 -0
  82. lemonade_server/pydantic_models.py +159 -0
  83. lemonade_server/server_models.json +643 -0
  84. lemonade_server/settings.py +39 -0
@@ -0,0 +1,263 @@
1
+ import argparse
2
+ import os
3
+ import lemonade.common.build as build
4
+ import lemonade.common.printing as printing
5
+ from lemonade.state import State
6
+ from lemonade.tools import Tool
7
+ from lemonade.tools.adapter import ModelAdapter, TokenizerAdapter
8
+ from lemonade.cache import Keys
9
+
10
+ DEFAULT_GENERATE_PARAMS = {
11
+ "do_sample": True,
12
+ "top_k": 50,
13
+ "top_p": 0.95,
14
+ "temperature": 0.7,
15
+ }
16
+
17
+ DEFAULT_RANDOM_SEED = 1
18
+ DEFAULT_MAX_NEW_TOKENS = 512
19
+ DEFAULT_N_TRIALS = 1
20
+
21
+
22
+ def sanitize_string(input_string):
23
+ return input_string.encode("charmap", "ignore").decode("charmap")
24
+
25
+
26
+ def sanitize_text(text):
27
+ if isinstance(text, str):
28
+ return sanitize_string(text)
29
+ elif isinstance(text, list):
30
+ return [sanitize_string(item) for item in text]
31
+ else:
32
+ raise TypeError("Input must be a string or a list of strings.")
33
+
34
+
35
+ def positive_int(x):
36
+ """Conversion function for argparse"""
37
+ i = int(x)
38
+ if i < 1:
39
+ raise ValueError("Non-positive values are not allowed")
40
+ return i
41
+
42
+
43
+ class LLMPrompt(Tool):
44
+ """
45
+ Send a prompt to an LLM instance and print the response to the screen.
46
+
47
+ Required input state:
48
+ - state.model: LLM instance that supports the generate() method.
49
+ - state.tokenizer: LLM tokenizer instance that supports the __call__() (ie, encode)
50
+ and decode() methods.
51
+
52
+ Output state produced:
53
+ - "response": text response from the LLM.
54
+ """
55
+
56
+ unique_name = "llm-prompt"
57
+
58
+ def __init__(self):
59
+ super().__init__(monitor_message="Prompting LLM")
60
+
61
+ self.status_stats = [
62
+ Keys.PROMPT_TOKENS,
63
+ Keys.PROMPT,
64
+ Keys.PROMPT_TEMPLATE,
65
+ Keys.RESPONSE_TOKENS,
66
+ Keys.RESPONSE,
67
+ Keys.RESPONSE_LENGTHS_HISTOGRAM,
68
+ ]
69
+
70
+ @staticmethod
71
+ def parser(add_help: bool = True) -> argparse.ArgumentParser:
72
+ parser = __class__.helpful_parser(
73
+ short_description="Prompt an LLM and print the result",
74
+ add_help=add_help,
75
+ )
76
+
77
+ parser.add_argument(
78
+ "--prompt",
79
+ "-p",
80
+ help="Input prompt to the LLM. Two formats are supported: "
81
+ "1) str: use a user-provided prompt string, and "
82
+ "2) path/to/prompt.txt: load the prompt from a .txt file.",
83
+ required=True,
84
+ )
85
+
86
+ parser.add_argument(
87
+ "--template",
88
+ "-t",
89
+ action="store_true",
90
+ help="Insert the prompt into the model's chat template before processing.",
91
+ )
92
+
93
+ parser.add_argument(
94
+ "--max-new-tokens",
95
+ "-m",
96
+ default=DEFAULT_MAX_NEW_TOKENS,
97
+ type=int,
98
+ help=f"Maximum number of new tokens in the response "
99
+ f"(default is {DEFAULT_MAX_NEW_TOKENS})",
100
+ )
101
+
102
+ parser.add_argument(
103
+ "--n-trials",
104
+ "-n",
105
+ default=DEFAULT_N_TRIALS,
106
+ type=positive_int,
107
+ help=f"Number of responses the LLM will generate for the prompt "
108
+ f"(useful for testing, default is {DEFAULT_N_TRIALS})",
109
+ )
110
+
111
+ parser.add_argument(
112
+ "--random-seed",
113
+ "-r",
114
+ default=str(DEFAULT_RANDOM_SEED),
115
+ help="Positive integer seed for random number generator used in "
116
+ "sampling tokens "
117
+ f"(default is {DEFAULT_RANDOM_SEED}). If the number of trials is "
118
+ "greater than one, then the seed is incremented by one for each "
119
+ "trial. Set to `None` for random, non-repeatable results. This "
120
+ "random seed behavior only applies to models loaded with "
121
+ "`oga-load` or `huggingface-load`.",
122
+ )
123
+
124
+ return parser
125
+
126
+ def parse(self, state: State, args, known_only=True) -> argparse.Namespace:
127
+ """
128
+ Helper function to parse CLI arguments into the args expected
129
+ by run()
130
+ """
131
+
132
+ parsed_args = super().parse(state, args, known_only)
133
+
134
+ # Decode prompt arg into a string prompt
135
+ if parsed_args.prompt.endswith(".txt") and os.path.exists(parsed_args.prompt):
136
+ with open(parsed_args.prompt, "r", encoding="utf-8") as f:
137
+ parsed_args.prompt = f.read()
138
+
139
+ if parsed_args.random_seed == "None":
140
+ parsed_args.random_seed = None
141
+ else:
142
+ parsed_args.random_seed = int(parsed_args.random_seed)
143
+
144
+ return parsed_args
145
+
146
+ def run(
147
+ self,
148
+ state: State,
149
+ prompt: str = "Hello",
150
+ max_new_tokens: int = DEFAULT_MAX_NEW_TOKENS,
151
+ n_trials: int = DEFAULT_N_TRIALS,
152
+ template: bool = False,
153
+ random_seed: int = DEFAULT_RANDOM_SEED,
154
+ ) -> State:
155
+
156
+ import matplotlib.pyplot as plt
157
+
158
+ model: ModelAdapter = state.model
159
+ tokenizer: TokenizerAdapter = state.tokenizer
160
+
161
+ # If template flag is set, then wrap prompt in template
162
+ if template:
163
+ # Embed prompt in model's chat template
164
+ if not hasattr(tokenizer, "prompt_template"):
165
+ printing.log_warning(
166
+ "Templates for this model type are not yet implemented."
167
+ )
168
+ elif tokenizer.chat_template:
169
+ # Use the model's built-in chat template if available
170
+ messages_dict = [{"role": "user", "content": prompt}]
171
+ prompt = tokenizer.apply_chat_template(
172
+ messages_dict, tokenize=False, add_generation_prompt=True
173
+ )
174
+ state.save_stat(Keys.PROMPT_TEMPLATE, "Model-specific")
175
+ else:
176
+ # Fallback to a standardized template
177
+ printing.log_info("No chat template found. Using default template.")
178
+ prompt = f"<|user|>\n{prompt} <|end|>\n<|assistant|>"
179
+ state.save_stat(Keys.PROMPT_TEMPLATE, "Default")
180
+
181
+ input_ids = tokenizer(prompt, return_tensors="pt").input_ids
182
+
183
+ len_tokens_out = []
184
+ response_texts = []
185
+ prompt_tokens = None # will be determined in generate function
186
+ for trial in range(n_trials):
187
+ if n_trials > 1:
188
+ self.set_percent_progress(100.0 * trial / n_trials)
189
+
190
+ # Get the response from the LLM, which may include the prompt in it
191
+ response = model.generate(
192
+ input_ids,
193
+ max_new_tokens=max_new_tokens,
194
+ random_seed=random_seed,
195
+ **DEFAULT_GENERATE_PARAMS,
196
+ )
197
+
198
+ # Increment random seed if not none
199
+ if random_seed is not None:
200
+ random_seed += 1
201
+
202
+ # Flatten the input and response
203
+ if isinstance(input_ids, (list, str)):
204
+ input_ids_array = input_ids
205
+ elif hasattr(input_ids, "shape") and len(input_ids.shape) == 1:
206
+ # 1-D array from newer OGA versions - already flat
207
+ input_ids_array = input_ids
208
+ else:
209
+ # 2-D tensor from HF models - take first row
210
+ input_ids_array = input_ids[0]
211
+
212
+ response_array = response if isinstance(response, str) else response[0]
213
+
214
+ prompt_tokens = model.prompt_tokens
215
+ len_tokens_out.append(model.response_tokens)
216
+
217
+ # Remove the input from the response
218
+ # (up to the point they diverge, which they should not)
219
+ counter = 0
220
+ len_input_ids = len(input_ids_array)
221
+ while (
222
+ counter < len_input_ids
223
+ and input_ids_array[counter] == response_array[counter]
224
+ ):
225
+ counter += 1
226
+
227
+ # Only decode the actual response (not the prompt)
228
+ response_text = tokenizer.decode(
229
+ response_array[counter:], skip_special_tokens=True
230
+ ).strip()
231
+ response_texts.append(response_text)
232
+
233
+ state.response = response_texts
234
+
235
+ if n_trials == 1:
236
+ len_tokens_out = len_tokens_out[0]
237
+ response_texts = response_texts[0]
238
+ else:
239
+ self.set_percent_progress(None)
240
+
241
+ # Plot data
242
+ plt.figure()
243
+ plt.hist(len_tokens_out, bins=20)
244
+ plt.xlabel("Response Length (tokens)")
245
+ plt.ylabel("Frequency")
246
+ plt.title(f"Histogram of Response Lengths\n{state.build_name}")
247
+ figure_path = os.path.join(
248
+ build.output_dir(state.cache_dir, state.build_name),
249
+ "response_lengths.png",
250
+ )
251
+ plt.savefig(figure_path)
252
+ state.save_stat(Keys.RESPONSE_LENGTHS_HISTOGRAM, figure_path)
253
+
254
+ state.save_stat(Keys.PROMPT_TOKENS, prompt_tokens)
255
+ state.save_stat(Keys.PROMPT, prompt)
256
+ state.save_stat(Keys.RESPONSE_TOKENS, len_tokens_out)
257
+ state.save_stat(Keys.RESPONSE, sanitize_text(response_texts))
258
+
259
+ return state
260
+
261
+
262
+ # This file was originally licensed under Apache 2.0. It has been modified.
263
+ # Modifications Copyright (c) 2025 AMD
File without changes
@@ -0,0 +1,203 @@
1
+ import argparse
2
+ import csv
3
+ import os
4
+ from pathlib import Path
5
+ import re
6
+ from typing import List
7
+ import lemonade.common.printing as printing
8
+ import lemonade.common.filesystem as fs
9
+ from lemonade.tools.management_tools import ManagementTool
10
+ from lemonade.cache import DEFAULT_CACHE_DIR
11
+ from lemonade.tools.report.table import LemonadeTable, LemonadePerfTable
12
+
13
+
14
+ class LemonadeReport(ManagementTool):
15
+ """
16
+ Analyzes the input lemonade cache(s) and produces an aggregated report
17
+ in csv format that contains the build stats for all builds in all cache(s).
18
+
19
+ In addition, summary information is printed to the console and saved to a text file.
20
+
21
+ When the --perf flag is used, then a performance report is generated that summarizes
22
+ the performance data for different models. In this case, only the data used in the
23
+ text table is saved to the csv format file.
24
+ """
25
+
26
+ unique_name = "report"
27
+
28
+ @staticmethod
29
+ def parser(add_help: bool = True) -> argparse.ArgumentParser:
30
+ parser = __class__.helpful_parser(
31
+ short_description="Export statistics from each lemonade run to a CSV file",
32
+ add_help=add_help,
33
+ )
34
+
35
+ parser.add_argument(
36
+ "-i",
37
+ "--input-caches",
38
+ nargs="*",
39
+ default=[DEFAULT_CACHE_DIR],
40
+ help=(
41
+ "One or more lemonade cache directories to use to generate the report "
42
+ f"(defaults to {DEFAULT_CACHE_DIR})"
43
+ ),
44
+ )
45
+
46
+ parser.add_argument(
47
+ "-o",
48
+ "--output-dir",
49
+ help="Path to folder where reports will be saved "
50
+ "(defaults to current working directory)",
51
+ required=False,
52
+ default=os.getcwd(),
53
+ )
54
+
55
+ parser.add_argument(
56
+ "--no-save",
57
+ action="store_true",
58
+ help="Don't save output to TXT and CSV files",
59
+ )
60
+
61
+ parser.add_argument(
62
+ "--perf",
63
+ action="store_true",
64
+ help="Produce the performance table instead of the regular table",
65
+ )
66
+
67
+ parser.add_argument(
68
+ "--device",
69
+ default=None,
70
+ help="In the --perf table, only include output for the specified device "
71
+ "(e.g., cpu, igpu, npu, hybrid)",
72
+ )
73
+
74
+ parser.add_argument(
75
+ "--dtype",
76
+ default=None,
77
+ help="In the --perf table, only include output for the specified datatype "
78
+ "(e.g., float32, int4)",
79
+ )
80
+
81
+ parser.add_argument(
82
+ "--model",
83
+ default=None,
84
+ help="In the --perf table, only include output for builds with name that contains"
85
+ " the specified string (e.g., Llama). The string match is case-insensitive.",
86
+ )
87
+
88
+ parser.add_argument(
89
+ "--days",
90
+ "-d",
91
+ type=int,
92
+ metavar="N",
93
+ default=None,
94
+ help="In the --perf table, only include output for builds from the last N days.",
95
+ )
96
+
97
+ parser.add_argument(
98
+ "--merge",
99
+ action="store_true",
100
+ help="In the --perf table, merge results from different builds into the same row "
101
+ "as long as model, device, datatype, system info and package version are the same.",
102
+ )
103
+
104
+ parser.add_argument(
105
+ "--lean",
106
+ action="store_true",
107
+ help="In the --perf table, don't include the system info and sw package versions.",
108
+ )
109
+
110
+ return parser
111
+
112
+ def parse(self, args, known_only=True) -> argparse.Namespace:
113
+ """
114
+ Helper function to parse CLI arguments into the args expected by run()
115
+ """
116
+ parsed_args = super().parse(args, known_only)
117
+
118
+ if not parsed_args.perf:
119
+ # Check that none of the perf specific flags are set
120
+ perf_args = [
121
+ parsed_args.device,
122
+ parsed_args.dtype,
123
+ parsed_args.days,
124
+ parsed_args.merge,
125
+ parsed_args.lean,
126
+ ]
127
+ if not all(arg is None or arg is False for arg in perf_args):
128
+ raise ValueError(
129
+ "Invalid arguments for regular report. Did you miss the --perf argument?"
130
+ " See `lemonade report -h` for help."
131
+ )
132
+
133
+ return parsed_args
134
+
135
+ def run(
136
+ self,
137
+ _,
138
+ input_caches: List[str] = None,
139
+ output_dir: str = os.getcwd(),
140
+ no_save: bool = False,
141
+ perf: bool = False,
142
+ device: str = None,
143
+ dtype: str = None,
144
+ model: str = None,
145
+ days: int = None,
146
+ merge: bool = False,
147
+ lean: bool = False,
148
+ ):
149
+ # Process input arguments
150
+ cache_dirs = [os.path.expanduser(dir) for dir in input_caches]
151
+ cache_dirs = fs.expand_inputs(cache_dirs)
152
+ report_dir = os.path.expanduser(output_dir)
153
+
154
+ if perf:
155
+ table = LemonadePerfTable(device, dtype, model, days, merge, lean)
156
+ else:
157
+ table = LemonadeTable()
158
+
159
+ # Find builds and load stats
160
+ table.find_builds(cache_dirs, model)
161
+ table.load_stats()
162
+ table.sort_stats()
163
+
164
+ # Print message if there are no stats
165
+ if len(table.all_stats) == 0:
166
+ printing.log_info("No relevant cached build data found")
167
+ return
168
+
169
+ # Print table to stdout
170
+ print()
171
+ print(table)
172
+
173
+ if no_save:
174
+ return
175
+
176
+ # Name report file
177
+ report_path = os.path.join(report_dir, table.get_report_name())
178
+ txt_path = re.sub(".csv$", ".txt", report_path)
179
+ Path(report_dir).mkdir(parents=True, exist_ok=True)
180
+
181
+ # Create the report to save to CSV
182
+ report, column_headers = table.create_csv_report()
183
+
184
+ # Populate results spreadsheet
185
+ with open(report_path, "w", newline="", encoding="utf8") as spreadsheet:
186
+ writer = csv.writer(spreadsheet)
187
+ writer.writerow(column_headers)
188
+ for entry in report:
189
+ writer.writerow([entry[col] for col in column_headers])
190
+
191
+ # Save the text report
192
+ with open(txt_path, "w", encoding="utf-8") as file:
193
+ print(table, file=file)
194
+
195
+ # Print message with the output file path
196
+ printing.log("Report text saved at ")
197
+ printing.logn(str(txt_path), printing.Colors.OKGREEN)
198
+ printing.log("Report spreadsheet saved at ")
199
+ printing.logn(str(report_path), printing.Colors.OKGREEN)
200
+
201
+
202
+ # This file was originally licensed under Apache 2.0. It has been modified.
203
+ # Modifications Copyright (c) 2025 AMD