lemonade-sdk 9.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lemonade/__init__.py +5 -0
- lemonade/api.py +180 -0
- lemonade/cache.py +92 -0
- lemonade/cli.py +173 -0
- lemonade/common/__init__.py +0 -0
- lemonade/common/build.py +176 -0
- lemonade/common/cli_helpers.py +139 -0
- lemonade/common/exceptions.py +98 -0
- lemonade/common/filesystem.py +368 -0
- lemonade/common/inference_engines.py +408 -0
- lemonade/common/network.py +93 -0
- lemonade/common/printing.py +110 -0
- lemonade/common/status.py +471 -0
- lemonade/common/system_info.py +1411 -0
- lemonade/common/test_helpers.py +28 -0
- lemonade/profilers/__init__.py +1 -0
- lemonade/profilers/agt_power.py +437 -0
- lemonade/profilers/hwinfo_power.py +429 -0
- lemonade/profilers/memory_tracker.py +259 -0
- lemonade/profilers/profiler.py +58 -0
- lemonade/sequence.py +363 -0
- lemonade/state.py +159 -0
- lemonade/tools/__init__.py +1 -0
- lemonade/tools/accuracy.py +432 -0
- lemonade/tools/adapter.py +114 -0
- lemonade/tools/bench.py +302 -0
- lemonade/tools/flm/__init__.py +1 -0
- lemonade/tools/flm/utils.py +305 -0
- lemonade/tools/huggingface/bench.py +187 -0
- lemonade/tools/huggingface/load.py +235 -0
- lemonade/tools/huggingface/utils.py +359 -0
- lemonade/tools/humaneval.py +264 -0
- lemonade/tools/llamacpp/bench.py +255 -0
- lemonade/tools/llamacpp/load.py +222 -0
- lemonade/tools/llamacpp/utils.py +1260 -0
- lemonade/tools/management_tools.py +319 -0
- lemonade/tools/mmlu.py +319 -0
- lemonade/tools/oga/__init__.py +0 -0
- lemonade/tools/oga/bench.py +120 -0
- lemonade/tools/oga/load.py +804 -0
- lemonade/tools/oga/migration.py +403 -0
- lemonade/tools/oga/utils.py +462 -0
- lemonade/tools/perplexity.py +147 -0
- lemonade/tools/prompt.py +263 -0
- lemonade/tools/report/__init__.py +0 -0
- lemonade/tools/report/llm_report.py +203 -0
- lemonade/tools/report/table.py +899 -0
- lemonade/tools/server/__init__.py +0 -0
- lemonade/tools/server/flm.py +133 -0
- lemonade/tools/server/llamacpp.py +320 -0
- lemonade/tools/server/serve.py +2123 -0
- lemonade/tools/server/static/favicon.ico +0 -0
- lemonade/tools/server/static/index.html +279 -0
- lemonade/tools/server/static/js/chat.js +1059 -0
- lemonade/tools/server/static/js/model-settings.js +183 -0
- lemonade/tools/server/static/js/models.js +1395 -0
- lemonade/tools/server/static/js/shared.js +556 -0
- lemonade/tools/server/static/logs.html +191 -0
- lemonade/tools/server/static/styles.css +2654 -0
- lemonade/tools/server/static/webapp.html +321 -0
- lemonade/tools/server/tool_calls.py +153 -0
- lemonade/tools/server/tray.py +664 -0
- lemonade/tools/server/utils/macos_tray.py +226 -0
- lemonade/tools/server/utils/port.py +77 -0
- lemonade/tools/server/utils/thread.py +85 -0
- lemonade/tools/server/utils/windows_tray.py +408 -0
- lemonade/tools/server/webapp.py +34 -0
- lemonade/tools/server/wrapped_server.py +559 -0
- lemonade/tools/tool.py +374 -0
- lemonade/version.py +1 -0
- lemonade_install/__init__.py +1 -0
- lemonade_install/install.py +239 -0
- lemonade_sdk-9.1.1.dist-info/METADATA +276 -0
- lemonade_sdk-9.1.1.dist-info/RECORD +84 -0
- lemonade_sdk-9.1.1.dist-info/WHEEL +5 -0
- lemonade_sdk-9.1.1.dist-info/entry_points.txt +5 -0
- lemonade_sdk-9.1.1.dist-info/licenses/LICENSE +201 -0
- lemonade_sdk-9.1.1.dist-info/licenses/NOTICE.md +47 -0
- lemonade_sdk-9.1.1.dist-info/top_level.txt +3 -0
- lemonade_server/cli.py +805 -0
- lemonade_server/model_manager.py +758 -0
- lemonade_server/pydantic_models.py +159 -0
- lemonade_server/server_models.json +643 -0
- lemonade_server/settings.py +39 -0
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
import statistics
|
|
3
|
+
from statistics import StatisticsError
|
|
4
|
+
import psutil
|
|
5
|
+
from lemonade.state import State
|
|
6
|
+
from lemonade.tools.adapter import ModelAdapter, TokenizerAdapter
|
|
7
|
+
from lemonade.tools.bench import Bench
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class OgaBench(Bench):
|
|
11
|
+
"""
|
|
12
|
+
Benchmark any model that adheres to the ModelAdapter interface.
|
|
13
|
+
|
|
14
|
+
Required input state:
|
|
15
|
+
- MODEL: model instance to benchmark.
|
|
16
|
+
- TOKENIZER: tokenizer instance used to generate inputs for the model.
|
|
17
|
+
|
|
18
|
+
Output state produced: None
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
unique_name = "oga-bench"
|
|
22
|
+
|
|
23
|
+
@staticmethod
|
|
24
|
+
def parser(add_help: bool = True) -> argparse.ArgumentParser:
|
|
25
|
+
parser = __class__.helpful_parser(
|
|
26
|
+
short_description="Benchmark an LLM in onnxruntime-genai (OGA)",
|
|
27
|
+
add_help=add_help,
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
parser = Bench.parser(parser)
|
|
31
|
+
|
|
32
|
+
return parser
|
|
33
|
+
|
|
34
|
+
def get_prompt_str(self, state, token_length):
|
|
35
|
+
"""
|
|
36
|
+
Returns a string with the prescribed token length.
|
|
37
|
+
"""
|
|
38
|
+
tokenizer: TokenizerAdapter = state.tokenizer
|
|
39
|
+
test_prompt = "word " * (token_length - 1)
|
|
40
|
+
input_ids = tokenizer(test_prompt, return_tensors="pt").input_ids
|
|
41
|
+
test_token_length = len(input_ids)
|
|
42
|
+
delta = test_token_length - token_length
|
|
43
|
+
if delta == 0:
|
|
44
|
+
return test_prompt
|
|
45
|
+
return "word " * max(token_length - 1 - delta, 0)
|
|
46
|
+
|
|
47
|
+
def run_prompt(
|
|
48
|
+
self,
|
|
49
|
+
state: State,
|
|
50
|
+
report_progress_fn,
|
|
51
|
+
prompt: str,
|
|
52
|
+
iterations: int,
|
|
53
|
+
warmup_iterations: int,
|
|
54
|
+
output_tokens: int,
|
|
55
|
+
):
|
|
56
|
+
|
|
57
|
+
model: ModelAdapter = state.model
|
|
58
|
+
tokenizer: TokenizerAdapter = state.tokenizer
|
|
59
|
+
|
|
60
|
+
input_ids = tokenizer(prompt, return_tensors="pt").input_ids
|
|
61
|
+
self.input_ids_len_list.append(len(input_ids))
|
|
62
|
+
per_iteration_time_to_first_token = []
|
|
63
|
+
per_iteration_tokens_per_second = []
|
|
64
|
+
|
|
65
|
+
# Don't capture time for warmup
|
|
66
|
+
for count in range(warmup_iterations):
|
|
67
|
+
_ = model.generate(input_ids, max_new_tokens=output_tokens)
|
|
68
|
+
self.tokens_out_len_list.append(model.response_tokens)
|
|
69
|
+
report_progress_fn((count + 1) / (warmup_iterations + iterations))
|
|
70
|
+
|
|
71
|
+
for count in range(iterations):
|
|
72
|
+
_ = model.generate(
|
|
73
|
+
input_ids,
|
|
74
|
+
max_new_tokens=output_tokens,
|
|
75
|
+
min_new_tokens=output_tokens,
|
|
76
|
+
)
|
|
77
|
+
report_progress_fn(
|
|
78
|
+
(warmup_iterations + count + 1) / (warmup_iterations + iterations)
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
self.tokens_out_len_list.append(model.response_tokens)
|
|
82
|
+
|
|
83
|
+
# Only count an iteration if it produced enough tokens
|
|
84
|
+
if model.response_tokens >= output_tokens:
|
|
85
|
+
per_iteration_time_to_first_token.append(model.time_to_first_token)
|
|
86
|
+
per_iteration_tokens_per_second.append(model.tokens_per_second)
|
|
87
|
+
|
|
88
|
+
if not per_iteration_time_to_first_token or not per_iteration_tokens_per_second:
|
|
89
|
+
raise Bench.not_enough_tokens(output_tokens)
|
|
90
|
+
|
|
91
|
+
mean_time_to_first_token = statistics.mean(per_iteration_time_to_first_token)
|
|
92
|
+
self.mean_time_to_first_token_list.append(mean_time_to_first_token)
|
|
93
|
+
self.prefill_tokens_per_second_list.append(
|
|
94
|
+
len(input_ids) / mean_time_to_first_token
|
|
95
|
+
)
|
|
96
|
+
self.token_generation_tokens_per_second_list.append(
|
|
97
|
+
statistics.mean(per_iteration_tokens_per_second)
|
|
98
|
+
)
|
|
99
|
+
try:
|
|
100
|
+
self.std_dev_time_to_first_token_list.append(
|
|
101
|
+
statistics.stdev(per_iteration_time_to_first_token)
|
|
102
|
+
)
|
|
103
|
+
except StatisticsError:
|
|
104
|
+
# Less than 2 measurements
|
|
105
|
+
self.std_dev_time_to_first_token_list.append(None)
|
|
106
|
+
try:
|
|
107
|
+
self.std_dev_token_generation_tokens_per_second_list.append(
|
|
108
|
+
statistics.stdev(per_iteration_tokens_per_second)
|
|
109
|
+
)
|
|
110
|
+
except StatisticsError:
|
|
111
|
+
# Less than 2 measurements
|
|
112
|
+
self.std_dev_token_generation_tokens_per_second_list.append(None)
|
|
113
|
+
if self.save_max_memory_used:
|
|
114
|
+
self.max_memory_used_gb_list.append(
|
|
115
|
+
psutil.Process().memory_info().peak_wset / 1024**3
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
# This file was originally licensed under Apache 2.0. It has been modified.
|
|
120
|
+
# Modifications Copyright (c) 2025 AMD
|