lemonade-sdk 9.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lemonade/__init__.py +5 -0
- lemonade/api.py +180 -0
- lemonade/cache.py +92 -0
- lemonade/cli.py +173 -0
- lemonade/common/__init__.py +0 -0
- lemonade/common/build.py +176 -0
- lemonade/common/cli_helpers.py +139 -0
- lemonade/common/exceptions.py +98 -0
- lemonade/common/filesystem.py +368 -0
- lemonade/common/inference_engines.py +408 -0
- lemonade/common/network.py +93 -0
- lemonade/common/printing.py +110 -0
- lemonade/common/status.py +471 -0
- lemonade/common/system_info.py +1411 -0
- lemonade/common/test_helpers.py +28 -0
- lemonade/profilers/__init__.py +1 -0
- lemonade/profilers/agt_power.py +437 -0
- lemonade/profilers/hwinfo_power.py +429 -0
- lemonade/profilers/memory_tracker.py +259 -0
- lemonade/profilers/profiler.py +58 -0
- lemonade/sequence.py +363 -0
- lemonade/state.py +159 -0
- lemonade/tools/__init__.py +1 -0
- lemonade/tools/accuracy.py +432 -0
- lemonade/tools/adapter.py +114 -0
- lemonade/tools/bench.py +302 -0
- lemonade/tools/flm/__init__.py +1 -0
- lemonade/tools/flm/utils.py +305 -0
- lemonade/tools/huggingface/bench.py +187 -0
- lemonade/tools/huggingface/load.py +235 -0
- lemonade/tools/huggingface/utils.py +359 -0
- lemonade/tools/humaneval.py +264 -0
- lemonade/tools/llamacpp/bench.py +255 -0
- lemonade/tools/llamacpp/load.py +222 -0
- lemonade/tools/llamacpp/utils.py +1260 -0
- lemonade/tools/management_tools.py +319 -0
- lemonade/tools/mmlu.py +319 -0
- lemonade/tools/oga/__init__.py +0 -0
- lemonade/tools/oga/bench.py +120 -0
- lemonade/tools/oga/load.py +804 -0
- lemonade/tools/oga/migration.py +403 -0
- lemonade/tools/oga/utils.py +462 -0
- lemonade/tools/perplexity.py +147 -0
- lemonade/tools/prompt.py +263 -0
- lemonade/tools/report/__init__.py +0 -0
- lemonade/tools/report/llm_report.py +203 -0
- lemonade/tools/report/table.py +899 -0
- lemonade/tools/server/__init__.py +0 -0
- lemonade/tools/server/flm.py +133 -0
- lemonade/tools/server/llamacpp.py +320 -0
- lemonade/tools/server/serve.py +2123 -0
- lemonade/tools/server/static/favicon.ico +0 -0
- lemonade/tools/server/static/index.html +279 -0
- lemonade/tools/server/static/js/chat.js +1059 -0
- lemonade/tools/server/static/js/model-settings.js +183 -0
- lemonade/tools/server/static/js/models.js +1395 -0
- lemonade/tools/server/static/js/shared.js +556 -0
- lemonade/tools/server/static/logs.html +191 -0
- lemonade/tools/server/static/styles.css +2654 -0
- lemonade/tools/server/static/webapp.html +321 -0
- lemonade/tools/server/tool_calls.py +153 -0
- lemonade/tools/server/tray.py +664 -0
- lemonade/tools/server/utils/macos_tray.py +226 -0
- lemonade/tools/server/utils/port.py +77 -0
- lemonade/tools/server/utils/thread.py +85 -0
- lemonade/tools/server/utils/windows_tray.py +408 -0
- lemonade/tools/server/webapp.py +34 -0
- lemonade/tools/server/wrapped_server.py +559 -0
- lemonade/tools/tool.py +374 -0
- lemonade/version.py +1 -0
- lemonade_install/__init__.py +1 -0
- lemonade_install/install.py +239 -0
- lemonade_sdk-9.1.1.dist-info/METADATA +276 -0
- lemonade_sdk-9.1.1.dist-info/RECORD +84 -0
- lemonade_sdk-9.1.1.dist-info/WHEEL +5 -0
- lemonade_sdk-9.1.1.dist-info/entry_points.txt +5 -0
- lemonade_sdk-9.1.1.dist-info/licenses/LICENSE +201 -0
- lemonade_sdk-9.1.1.dist-info/licenses/NOTICE.md +47 -0
- lemonade_sdk-9.1.1.dist-info/top_level.txt +3 -0
- lemonade_server/cli.py +805 -0
- lemonade_server/model_manager.py +758 -0
- lemonade_server/pydantic_models.py +159 -0
- lemonade_server/server_models.json +643 -0
- lemonade_server/settings.py +39 -0
lemonade/__init__.py
ADDED
lemonade/api.py
ADDED
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
# pylint: disable=no-member
|
|
2
|
+
|
|
3
|
+
from typing import Tuple, Dict
|
|
4
|
+
from lemonade.state import State
|
|
5
|
+
import lemonade.common.printing as printing
|
|
6
|
+
import lemonade.cache as cache
|
|
7
|
+
from lemonade.tools.adapter import ModelAdapter, TokenizerAdapter
|
|
8
|
+
from lemonade.common.system_info import (
|
|
9
|
+
get_system_info_dict,
|
|
10
|
+
get_device_info_dict,
|
|
11
|
+
get_system_info as get_system_info_obj,
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class NotSupported(Exception):
|
|
16
|
+
"""
|
|
17
|
+
Indicates that a checkpoint/recipe pair are not supported
|
|
18
|
+
together at this time.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
def __init__(self, msg):
|
|
22
|
+
super().__init__(msg)
|
|
23
|
+
printing.log_error(msg)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def _raise_not_supported(recipe, checkpoint):
|
|
27
|
+
raise NotSupported(
|
|
28
|
+
f"Recipe {recipe} does not have support for checkpoint {checkpoint}"
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _make_state(recipe, checkpoint) -> Dict:
|
|
33
|
+
return State(cache_dir=cache.DEFAULT_CACHE_DIR, build_name=f"{checkpoint}_{recipe}")
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def from_pretrained(
|
|
37
|
+
checkpoint: str,
|
|
38
|
+
recipe: str = "hf-cpu",
|
|
39
|
+
do_not_upgrade: bool = True,
|
|
40
|
+
) -> Tuple[ModelAdapter, TokenizerAdapter]:
|
|
41
|
+
"""
|
|
42
|
+
Load an LLM and the corresponding tokenizer using a lemonade recipe.
|
|
43
|
+
|
|
44
|
+
Args:
|
|
45
|
+
- checkpoint: huggingface checkpoint that defines the LLM
|
|
46
|
+
- recipe: defines the implementation and hardware used for the LLM
|
|
47
|
+
- do_not_upgrade: prioritize the local copy of the model, if available,
|
|
48
|
+
even if an upgraded copy is available on the server (note: only applies
|
|
49
|
+
for oga-* recipes)
|
|
50
|
+
|
|
51
|
+
Recipe choices:
|
|
52
|
+
- hf-cpu: Huggingface Transformers implementation for CPU with max-perf settings
|
|
53
|
+
- hf-dgpu: Huggingface Transformers implementation on dGPU (via device="cuda")
|
|
54
|
+
- oga-cpu: CPU implementation based on onnxruntime-genai
|
|
55
|
+
- oga-igpu: DirectML implementation for iGPU based on onnxruntime-genai-directml
|
|
56
|
+
- oga-hybird: AMD Ryzen AI Hybrid implementation based on onnxruntime-genai
|
|
57
|
+
|
|
58
|
+
Returns:
|
|
59
|
+
- model: LLM instance with a generate() method that invokes the recipe
|
|
60
|
+
- tokenizer: tokenizer instance compatible with the model, which supports
|
|
61
|
+
the encode (call) and decode() methods.
|
|
62
|
+
"""
|
|
63
|
+
|
|
64
|
+
if recipe == "hf-cpu":
|
|
65
|
+
# Huggingface Transformers recipe for CPU
|
|
66
|
+
# Huggingface supports all checkpoints, so there is nothing to check for
|
|
67
|
+
|
|
68
|
+
import torch
|
|
69
|
+
from lemonade.tools.huggingface.load import HuggingfaceLoad
|
|
70
|
+
|
|
71
|
+
state = _make_state(recipe, checkpoint)
|
|
72
|
+
|
|
73
|
+
state = HuggingfaceLoad().run(
|
|
74
|
+
state,
|
|
75
|
+
input=checkpoint,
|
|
76
|
+
dtype=torch.bfloat16,
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
return state.model, state.tokenizer
|
|
80
|
+
|
|
81
|
+
elif recipe == "hf-dgpu":
|
|
82
|
+
# Huggingface Transformers recipe for discrete GPU (Nvidia, Instinct, Radeon)
|
|
83
|
+
|
|
84
|
+
import torch
|
|
85
|
+
from lemonade.tools.huggingface.load import HuggingfaceLoad
|
|
86
|
+
|
|
87
|
+
state = _make_state(recipe, checkpoint)
|
|
88
|
+
|
|
89
|
+
state = HuggingfaceLoad().run(
|
|
90
|
+
state,
|
|
91
|
+
input=checkpoint,
|
|
92
|
+
dtype=torch.bfloat16,
|
|
93
|
+
device="cuda",
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
return state.model, state.tokenizer
|
|
97
|
+
|
|
98
|
+
elif recipe.startswith("oga-"):
|
|
99
|
+
import lemonade.tools.oga.load as oga
|
|
100
|
+
|
|
101
|
+
# Make sure the user chose a supported runtime, e.g., oga-cpu
|
|
102
|
+
user_backend = recipe.split("oga-")[1]
|
|
103
|
+
supported_backends = ["cpu", "igpu", "npu", "hybrid"]
|
|
104
|
+
supported_recipes = [f"oga-{backend}" for backend in supported_backends]
|
|
105
|
+
if recipe not in supported_recipes:
|
|
106
|
+
raise NotSupported(
|
|
107
|
+
"Selected OGA recipe is not supported. "
|
|
108
|
+
f"The supported OGA recipes are: {supported_recipes}"
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
backend_to_dtype = {
|
|
112
|
+
"cpu": "int4",
|
|
113
|
+
"igpu": "int4",
|
|
114
|
+
"hybrid": "int4",
|
|
115
|
+
"npu": "int4",
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
state = _make_state(recipe, checkpoint)
|
|
119
|
+
|
|
120
|
+
state = oga.OgaLoad().run(
|
|
121
|
+
state,
|
|
122
|
+
input=checkpoint,
|
|
123
|
+
device=user_backend,
|
|
124
|
+
dtype=backend_to_dtype[user_backend],
|
|
125
|
+
do_not_upgrade=do_not_upgrade,
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
return state.model, state.tokenizer
|
|
129
|
+
|
|
130
|
+
else:
|
|
131
|
+
_raise_not_supported(recipe, checkpoint)
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def get_system_info(verbose: bool = False) -> Dict:
|
|
135
|
+
"""
|
|
136
|
+
Get comprehensive system information including hardware details and device information.
|
|
137
|
+
|
|
138
|
+
Returns:
|
|
139
|
+
dict: Complete system information including:
|
|
140
|
+
- Basic system info (OS, processor, memory, BIOS, etc.).
|
|
141
|
+
- Device information (CPU, AMD iGPU, AMD dGPU, NPU).
|
|
142
|
+
- Inference engine availability per device.
|
|
143
|
+
- Python package versions (verbose mode only).
|
|
144
|
+
"""
|
|
145
|
+
|
|
146
|
+
# Get basic system info
|
|
147
|
+
info = get_system_info_dict()
|
|
148
|
+
|
|
149
|
+
# Add device information
|
|
150
|
+
info["Devices"] = get_device_info_dict()
|
|
151
|
+
|
|
152
|
+
# Filter out verbose-only information if not in verbose mode
|
|
153
|
+
if not verbose:
|
|
154
|
+
essential_keys = ["OS Version", "Processor", "Physical Memory", "Devices"]
|
|
155
|
+
info = {k: v for k, v in info.items() if k in essential_keys}
|
|
156
|
+
else:
|
|
157
|
+
# In verbose mode, add Python packages at the end
|
|
158
|
+
system_info_obj = get_system_info_obj()
|
|
159
|
+
info["Python Packages"] = system_info_obj.get_python_packages()
|
|
160
|
+
|
|
161
|
+
return info
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def get_device_info() -> Dict:
|
|
165
|
+
"""
|
|
166
|
+
Get device information including CPU, AMD iGPU, AMD dGPU, and NPU details.
|
|
167
|
+
|
|
168
|
+
Returns:
|
|
169
|
+
dict: Device information including:
|
|
170
|
+
- cpu: CPU details with inference engine availability.
|
|
171
|
+
- amd_igpu: AMD integrated GPU information.
|
|
172
|
+
- amd_dgpu: List of AMD discrete GPU information.
|
|
173
|
+
- npu: NPU information.
|
|
174
|
+
"""
|
|
175
|
+
|
|
176
|
+
return get_device_info_dict()
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
# This file was originally licensed under Apache 2.0. It has been modified.
|
|
180
|
+
# Modifications Copyright (c) 2025 AMD
|
lemonade/cache.py
ADDED
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from datetime import datetime, timezone
|
|
3
|
+
|
|
4
|
+
# Allow an environment variable to override the default
|
|
5
|
+
# location for the build cache
|
|
6
|
+
if os.environ.get("LEMONADE_CACHE_DIR"):
|
|
7
|
+
DEFAULT_CACHE_DIR = os.path.expanduser(os.environ.get("LEMONADE_CACHE_DIR"))
|
|
8
|
+
else:
|
|
9
|
+
DEFAULT_CACHE_DIR = os.path.join(os.path.expanduser("~"), ".cache", "lemonade")
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def checkpoint_to_model_name(checkpoint_name: str) -> str:
|
|
13
|
+
"""
|
|
14
|
+
Get the model's name by stripping the author's name from the checkpoint name
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
return checkpoint_name.split("/")[1]
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def get_timestamp() -> str:
|
|
21
|
+
"""
|
|
22
|
+
Get a timestamp string in the format:
|
|
23
|
+
<year>y_<month>m_<day>d_<hour>h_<minute>m_<second>s
|
|
24
|
+
"""
|
|
25
|
+
# Get the current time in GMT
|
|
26
|
+
current_time = datetime.now(timezone.utc)
|
|
27
|
+
|
|
28
|
+
# Format the timestamp string
|
|
29
|
+
timestamp = current_time.strftime("%Yy_%mm_%dd_%Hh_%Mm_%Ss")
|
|
30
|
+
return timestamp
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def build_name(input_name):
|
|
34
|
+
"""
|
|
35
|
+
Name the lemonade build by concatenating these two factors:
|
|
36
|
+
1. Sanitize the input name (typically a model checkpoint name) by
|
|
37
|
+
replacing any `/` characters with `_` and ':' characters with '-'.
|
|
38
|
+
2. Timestamp to ensure that builds in the same cache will not
|
|
39
|
+
collide in the same build directory.
|
|
40
|
+
|
|
41
|
+
If the input_name is a local folder, then we don't know the
|
|
42
|
+
model checkpoint name, so we use "local_model"
|
|
43
|
+
"""
|
|
44
|
+
|
|
45
|
+
if os.path.isdir(input_name):
|
|
46
|
+
# Input is a folder so no good way to determine a model name
|
|
47
|
+
input_name_sanitized = "local_model"
|
|
48
|
+
elif os.path.isfile(input_name):
|
|
49
|
+
# Use the filename without its extension
|
|
50
|
+
input_name_sanitized = os.path.splitext(os.path.basename(input_name))[0]
|
|
51
|
+
else:
|
|
52
|
+
# Sanitize the input name
|
|
53
|
+
input_name_sanitized = input_name.replace("/", "_")
|
|
54
|
+
input_name_sanitized = input_name_sanitized.replace(":", "-")
|
|
55
|
+
|
|
56
|
+
# Get the formatted timestamp string
|
|
57
|
+
timestamp = get_timestamp()
|
|
58
|
+
|
|
59
|
+
return f"{input_name_sanitized}_{timestamp}"
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
class Keys:
|
|
63
|
+
MODEL = "model"
|
|
64
|
+
PER_ITERATION_LATENCY = "per_iteration_latency"
|
|
65
|
+
MEAN_LATENCY = "mean_latency"
|
|
66
|
+
STD_DEV_LATENCY = "std_dev_latency"
|
|
67
|
+
TOKEN_GENERATION_TOKENS_PER_SECOND = "token_generation_tokens_per_second"
|
|
68
|
+
STD_DEV_TOKENS_PER_SECOND = "std_dev_tokens_per_second"
|
|
69
|
+
SECONDS_TO_FIRST_TOKEN = "seconds_to_first_token"
|
|
70
|
+
STD_DEV_SECONDS_TO_FIRST_TOKEN = "std_dev_seconds_to_first_token"
|
|
71
|
+
PREFILL_TOKENS_PER_SECOND = "prefill_tokens_per_second"
|
|
72
|
+
STD_DEV_PREFILL_TOKENS_PER_SECOND = "std_dev_prefill_tokens_per_second"
|
|
73
|
+
CHECKPOINT = "checkpoint"
|
|
74
|
+
DTYPE = "dtype"
|
|
75
|
+
PROMPT = "prompt"
|
|
76
|
+
PROMPT_TOKENS = "prompt_tokens"
|
|
77
|
+
PROMPT_TEMPLATE = "prompt_template"
|
|
78
|
+
RESPONSE = "response"
|
|
79
|
+
RESPONSE_TOKENS = "response_tokens"
|
|
80
|
+
RESPONSE_LENGTHS_HISTOGRAM = "response_lengths_histogram"
|
|
81
|
+
CACHE_DIR = "cache_dir"
|
|
82
|
+
DEVICE = "device"
|
|
83
|
+
LOCAL_MODEL_FOLDER = "local_model_folder"
|
|
84
|
+
MEMORY_USAGE_PLOT = "memory_usage_plot"
|
|
85
|
+
MAX_MEMORY_USED_GB = "max_memory_used_GB"
|
|
86
|
+
MAX_MEMORY_USED_GBYTE = "max_memory_used_gbyte"
|
|
87
|
+
RYZEN_AI_VERSION_INFO = "ryzen_ai_version_info"
|
|
88
|
+
LLAMA_CLI_VERSION_INFO = "llama_cli_version_info"
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
# This file was originally licensed under Apache 2.0. It has been modified.
|
|
92
|
+
# Modifications Copyright (c) 2025 AMD
|
lemonade/cli.py
ADDED
|
@@ -0,0 +1,173 @@
|
|
|
1
|
+
import os
|
|
2
|
+
|
|
3
|
+
# pylint: disable=C0413
|
|
4
|
+
# Prevent HF warnings from showing on every import
|
|
5
|
+
os.environ["TRANSFORMERS_NO_ADVISORY_WARNINGS"] = "1"
|
|
6
|
+
from lemonade.version import __version__ as version_number
|
|
7
|
+
from lemonade.tools import FirstTool, NiceHelpFormatter
|
|
8
|
+
from lemonade.profilers.memory_tracker import MemoryTracker
|
|
9
|
+
import lemonade.common.filesystem as fs
|
|
10
|
+
import lemonade.common.cli_helpers as cli
|
|
11
|
+
from lemonade.sequence import Sequence
|
|
12
|
+
from lemonade.tools.management_tools import Cache, Version, SystemInfo
|
|
13
|
+
from lemonade.state import State
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def get_available_profilers(warn_missing=False):
|
|
17
|
+
"""Get list of available profilers, with conditional imports for optional dependencies.
|
|
18
|
+
|
|
19
|
+
Args:
|
|
20
|
+
warn_missing: If True, print warnings for missing profilers. If False, fail silently.
|
|
21
|
+
"""
|
|
22
|
+
profilers = [MemoryTracker]
|
|
23
|
+
|
|
24
|
+
try:
|
|
25
|
+
from lemonade.profilers.hwinfo_power import HWINFOPowerProfiler
|
|
26
|
+
|
|
27
|
+
profilers.append(HWINFOPowerProfiler)
|
|
28
|
+
except ImportError:
|
|
29
|
+
if warn_missing:
|
|
30
|
+
print(
|
|
31
|
+
"Warning: HWINFOPowerProfiler not available. "
|
|
32
|
+
"Install lemonade with dev extras: "
|
|
33
|
+
"pip install lemonade-sdk[dev]"
|
|
34
|
+
)
|
|
35
|
+
try:
|
|
36
|
+
from lemonade.profilers.agt_power import AGTPowerProfiler
|
|
37
|
+
|
|
38
|
+
profilers.append(AGTPowerProfiler)
|
|
39
|
+
except ImportError:
|
|
40
|
+
if warn_missing:
|
|
41
|
+
print(
|
|
42
|
+
"Warning: AGTPowerProfiler not available. "
|
|
43
|
+
"Install lemonade with dev extras: "
|
|
44
|
+
"pip install lemonade-sdk[dev]"
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
return profilers
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
from lemonade.tools.huggingface.load import HuggingfaceLoad
|
|
51
|
+
from lemonade.tools.huggingface.bench import HuggingfaceBench
|
|
52
|
+
from lemonade.tools.oga.load import OgaLoad
|
|
53
|
+
from lemonade.tools.oga.bench import OgaBench
|
|
54
|
+
from lemonade.tools.llamacpp.bench import LlamaCppBench
|
|
55
|
+
from lemonade.tools.llamacpp.load import LoadLlamaCpp
|
|
56
|
+
|
|
57
|
+
import lemonade.cache as cache
|
|
58
|
+
from lemonade.tools.mmlu import AccuracyMMLU
|
|
59
|
+
from lemonade.tools.humaneval import AccuracyHumaneval
|
|
60
|
+
from lemonade.tools.perplexity import AccuracyPerplexity
|
|
61
|
+
from lemonade.tools.accuracy import LMEvalHarness
|
|
62
|
+
from lemonade.tools.prompt import LLMPrompt
|
|
63
|
+
from lemonade.tools.report.llm_report import LemonadeReport
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def main():
|
|
67
|
+
|
|
68
|
+
# List the available tools
|
|
69
|
+
tools = [
|
|
70
|
+
HuggingfaceLoad,
|
|
71
|
+
LoadLlamaCpp,
|
|
72
|
+
LlamaCppBench,
|
|
73
|
+
AccuracyMMLU,
|
|
74
|
+
AccuracyHumaneval,
|
|
75
|
+
AccuracyPerplexity,
|
|
76
|
+
LMEvalHarness,
|
|
77
|
+
LLMPrompt,
|
|
78
|
+
HuggingfaceBench,
|
|
79
|
+
OgaLoad,
|
|
80
|
+
OgaBench,
|
|
81
|
+
LemonadeReport,
|
|
82
|
+
# Inherited from lemonade
|
|
83
|
+
Cache,
|
|
84
|
+
Version,
|
|
85
|
+
SystemInfo,
|
|
86
|
+
]
|
|
87
|
+
|
|
88
|
+
# List the available profilers
|
|
89
|
+
profilers = get_available_profilers()
|
|
90
|
+
|
|
91
|
+
# Define the argument parser
|
|
92
|
+
parser = cli.CustomArgumentParser(
|
|
93
|
+
description=f"""Tools for evaluating and deploying LLMs (v{version_number}).
|
|
94
|
+
|
|
95
|
+
Read this to learn the command syntax:
|
|
96
|
+
https://github.com/lemonade-sdk/lemonade/blob/main/docs/README.md""",
|
|
97
|
+
formatter_class=NiceHelpFormatter,
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
parser.add_argument(
|
|
101
|
+
"-i",
|
|
102
|
+
"--input",
|
|
103
|
+
help="The input that will be evaluated by the starting tool "
|
|
104
|
+
"(e.g., huggingface checkpoint)",
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
parser.add_argument(
|
|
108
|
+
"-d",
|
|
109
|
+
"--cache-dir",
|
|
110
|
+
help="Cache directory where tool results are "
|
|
111
|
+
f"stored (default: {cache.DEFAULT_CACHE_DIR})",
|
|
112
|
+
required=False,
|
|
113
|
+
default=cache.DEFAULT_CACHE_DIR,
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
for profiler in profilers:
|
|
117
|
+
profiler.add_arguments_to_parser(parser)
|
|
118
|
+
|
|
119
|
+
global_args, tool_instances, evaluation_tools = cli.parse_tools(
|
|
120
|
+
parser, tools, cli_name="lemonade"
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
# Check if any profilers are being requested
|
|
124
|
+
requested_profilers = [
|
|
125
|
+
profiler.unique_name.replace("-", "_")
|
|
126
|
+
for profiler in profilers
|
|
127
|
+
if global_args.get(profiler.unique_name.replace("-", "_"), None) is not None
|
|
128
|
+
]
|
|
129
|
+
|
|
130
|
+
# If profilers are requested, get the full list with warnings for missing ones
|
|
131
|
+
if requested_profilers:
|
|
132
|
+
get_available_profilers(warn_missing=True)
|
|
133
|
+
|
|
134
|
+
profiler_instances = [
|
|
135
|
+
profiler(global_args[profiler.unique_name.replace("-", "_")])
|
|
136
|
+
for profiler in profilers
|
|
137
|
+
if global_args.get(profiler.unique_name.replace("-", "_"), None) is not None
|
|
138
|
+
]
|
|
139
|
+
|
|
140
|
+
if len(evaluation_tools) > 0:
|
|
141
|
+
if not issubclass(evaluation_tools[0], FirstTool):
|
|
142
|
+
parser.error(
|
|
143
|
+
"The first tool in the sequence needs to be one "
|
|
144
|
+
"of the 'tools that can start a sequence.' Use "
|
|
145
|
+
"`lemonade -h` to see that list of tools."
|
|
146
|
+
)
|
|
147
|
+
# Run the evaluation tools as a build
|
|
148
|
+
sequence = Sequence(tools=tool_instances, profilers=profiler_instances)
|
|
149
|
+
|
|
150
|
+
# Forward the selected input to the first tool in the sequence
|
|
151
|
+
first_tool_args = next(iter(sequence.tools.values()))
|
|
152
|
+
first_tool_args.append("--input")
|
|
153
|
+
first_tool_args.append(global_args["input"])
|
|
154
|
+
|
|
155
|
+
state = State(
|
|
156
|
+
cache_dir=os.path.abspath(global_args["cache_dir"]),
|
|
157
|
+
build_name=cache.build_name(global_args["input"]),
|
|
158
|
+
sequence_info=sequence.info,
|
|
159
|
+
)
|
|
160
|
+
sequence.launch(state)
|
|
161
|
+
else:
|
|
162
|
+
# Run the management tools
|
|
163
|
+
for management_tool, argv in tool_instances.items():
|
|
164
|
+
# Support "~" in the cache_dir argument
|
|
165
|
+
parsed_cache_dir = os.path.expanduser(global_args[fs.Keys.CACHE_DIR])
|
|
166
|
+
management_tool.parse_and_run(parsed_cache_dir, argv)
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
if __name__ == "__main__":
|
|
170
|
+
main()
|
|
171
|
+
|
|
172
|
+
# This file was originally licensed under Apache 2.0. It has been modified.
|
|
173
|
+
# Modifications Copyright (c) 2025 AMD
|
|
File without changes
|
lemonade/common/build.py
ADDED
|
@@ -0,0 +1,176 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import logging
|
|
3
|
+
import sys
|
|
4
|
+
import traceback
|
|
5
|
+
from typing import Dict
|
|
6
|
+
import hashlib
|
|
7
|
+
import psutil
|
|
8
|
+
import yaml
|
|
9
|
+
import lemonade.common.exceptions as exp
|
|
10
|
+
|
|
11
|
+
state_file_name = "state.yaml"
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def load_yaml(file_path) -> Dict:
|
|
15
|
+
with open(file_path, "r", encoding="utf8") as stream:
|
|
16
|
+
try:
|
|
17
|
+
return yaml.load(stream, Loader=yaml.FullLoader)
|
|
18
|
+
except yaml.YAMLError as e:
|
|
19
|
+
raise exp.IOError(
|
|
20
|
+
f"Failed while trying to open {file_path}."
|
|
21
|
+
f"The exception that triggered this was:\n{e}"
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def builds_dir(cache_dir):
|
|
26
|
+
"""
|
|
27
|
+
Each build stores stats, logs, and other files in a build directory.
|
|
28
|
+
All build directories are located at:
|
|
29
|
+
<cache_dir>/builds
|
|
30
|
+
"""
|
|
31
|
+
return os.path.join(cache_dir, "builds")
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def output_dir(cache_dir, build_name):
|
|
35
|
+
"""
|
|
36
|
+
Each build stores stats, logs, and other files in an output directory at:
|
|
37
|
+
All build directories are located at:
|
|
38
|
+
<builds_dir>/<build_name>
|
|
39
|
+
"""
|
|
40
|
+
path = os.path.join(builds_dir(cache_dir), build_name)
|
|
41
|
+
return path
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def state_file(cache_dir, build_name):
|
|
45
|
+
path = os.path.join(output_dir(cache_dir, build_name), state_file_name)
|
|
46
|
+
return path
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class FunctionStatus:
|
|
50
|
+
"""
|
|
51
|
+
Status values that are assigned to tools, builds, benchmarks, and other
|
|
52
|
+
functionality to help the user understand whether that function completed
|
|
53
|
+
successfully or not.
|
|
54
|
+
"""
|
|
55
|
+
|
|
56
|
+
# SUCCESSFUL means the tool/build/benchmark completed successfully.
|
|
57
|
+
SUCCESSFUL = "successful"
|
|
58
|
+
|
|
59
|
+
# ERROR means the tool/build/benchmark failed and threw some error that
|
|
60
|
+
# was caught by lemonade. You should proceed by looking at the build
|
|
61
|
+
# logs to see what happened.
|
|
62
|
+
|
|
63
|
+
ERROR = "error"
|
|
64
|
+
|
|
65
|
+
# TIMEOUT means the tool/build/benchmark failed because it exceeded the timeout
|
|
66
|
+
# set for the lemonade command.
|
|
67
|
+
TIMEOUT = "timeout"
|
|
68
|
+
|
|
69
|
+
# KILLED means the build/benchmark failed because the system killed it. This can
|
|
70
|
+
# happen because of an out-of-memory (OOM), system shutdown, etc.
|
|
71
|
+
# You should proceed by re-running the build and keeping an eye on it to observe
|
|
72
|
+
# why it is being killed (e.g., watch the RAM utilization to diagnose an OOM).
|
|
73
|
+
KILLED = "killed"
|
|
74
|
+
|
|
75
|
+
# The NOT_STARTED status is applied to all tools/builds/benchmarks at startup.
|
|
76
|
+
# It will be replaced by one of the other status values if the tool/build/benchmark
|
|
77
|
+
# has a chance to start running.
|
|
78
|
+
# A value of NOT_STARTED in the report CSV indicates that the tool/build/benchmark
|
|
79
|
+
# never had a chance to start because lemonade exited before that functionality had
|
|
80
|
+
# a chance to start running.
|
|
81
|
+
NOT_STARTED = "not_started"
|
|
82
|
+
|
|
83
|
+
# INCOMPLETE indicates that a tool/build/benchmark started running and did not complete.
|
|
84
|
+
# Each tool, build, and benchmark are marked as INCOMPLETE when they start running.
|
|
85
|
+
# If you open the lemonade_stats.yaml file while the tool/build/benchmark
|
|
86
|
+
# is still running, the status will show as INCOMPLETE. If the tool/build/benchmark
|
|
87
|
+
# is killed without the chance to do any stats cleanup, the status will continue to
|
|
88
|
+
# show as INCOMPLETE in lemonade_stats.yaml.
|
|
89
|
+
# When the report CSV is created, any instance of an INCOMPLETE tool/build/benchmark
|
|
90
|
+
# status will be replaced by KILLED.
|
|
91
|
+
INCOMPLETE = "incomplete"
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
# Create a unique ID from this run by hashing pid + process start time
|
|
95
|
+
def unique_id():
|
|
96
|
+
pid = os.getpid()
|
|
97
|
+
p = psutil.Process(pid)
|
|
98
|
+
start_time = p.create_time()
|
|
99
|
+
return hashlib.sha256(f"{pid}{start_time}".encode()).hexdigest()
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
class Logger:
|
|
103
|
+
"""
|
|
104
|
+
Redirects stdout to file (and console if needed)
|
|
105
|
+
"""
|
|
106
|
+
|
|
107
|
+
def __init__(
|
|
108
|
+
self,
|
|
109
|
+
initial_message: str,
|
|
110
|
+
log_path: str = None,
|
|
111
|
+
):
|
|
112
|
+
self.debug = os.environ.get("LEMONADE_BUILD_DEBUG") == "True"
|
|
113
|
+
self.terminal = sys.stdout
|
|
114
|
+
self.terminal_err = sys.stderr
|
|
115
|
+
self.log_path = log_path
|
|
116
|
+
|
|
117
|
+
# Create the empty logfile
|
|
118
|
+
with open(log_path, "w", encoding="utf-8") as f:
|
|
119
|
+
f.write(f"{initial_message}\n")
|
|
120
|
+
|
|
121
|
+
# Disable any existing loggers so that we can capture all
|
|
122
|
+
# outputs to a logfile
|
|
123
|
+
self.root_logger = logging.getLogger()
|
|
124
|
+
self.handlers = [handler for handler in self.root_logger.handlers]
|
|
125
|
+
for handler in self.handlers:
|
|
126
|
+
self.root_logger.removeHandler(handler)
|
|
127
|
+
|
|
128
|
+
# Send any logger outputs to the logfile
|
|
129
|
+
if not self.debug:
|
|
130
|
+
self.file_handler = logging.FileHandler(filename=log_path)
|
|
131
|
+
self.file_handler.setLevel(logging.INFO)
|
|
132
|
+
formatter = logging.Formatter(
|
|
133
|
+
"%(asctime)s - %(name)s - %(levelname)s - %(message)s"
|
|
134
|
+
)
|
|
135
|
+
self.file_handler.setFormatter(formatter)
|
|
136
|
+
self.root_logger.addHandler(self.file_handler)
|
|
137
|
+
|
|
138
|
+
def __enter__(self):
|
|
139
|
+
sys.stdout = self
|
|
140
|
+
sys.stderr = self
|
|
141
|
+
|
|
142
|
+
def __exit__(self, _exc_type, _exc_value, _exc_tb):
|
|
143
|
+
# Ensure we also capture the traceback as part of the logger when exceptions happen
|
|
144
|
+
if _exc_type:
|
|
145
|
+
traceback.print_exception(_exc_type, _exc_value, _exc_tb)
|
|
146
|
+
|
|
147
|
+
# Stop redirecting stdout/stderr
|
|
148
|
+
sys.stdout = self.terminal
|
|
149
|
+
sys.stderr = self.terminal_err
|
|
150
|
+
|
|
151
|
+
# Remove the logfile logging handler
|
|
152
|
+
if not self.debug:
|
|
153
|
+
self.file_handler.close()
|
|
154
|
+
self.root_logger.removeHandler(self.file_handler)
|
|
155
|
+
|
|
156
|
+
# Restore any pre-existing loggers
|
|
157
|
+
for handler in self.handlers:
|
|
158
|
+
self.root_logger.addHandler(handler)
|
|
159
|
+
|
|
160
|
+
def write(self, message):
|
|
161
|
+
if self.log_path is not None:
|
|
162
|
+
with open(self.log_path, "a", encoding="utf-8") as f:
|
|
163
|
+
f.write(message)
|
|
164
|
+
if self.debug or self.log_path is None:
|
|
165
|
+
self.terminal.write(message)
|
|
166
|
+
self.terminal.flush()
|
|
167
|
+
self.terminal_err.write(message)
|
|
168
|
+
self.terminal_err.flush()
|
|
169
|
+
|
|
170
|
+
def flush(self):
|
|
171
|
+
# needed for python 3 compatibility.
|
|
172
|
+
pass
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
# This file was originally licensed under Apache 2.0. It has been modified.
|
|
176
|
+
# Modifications Copyright (c) 2025 AMD
|