lemonade-sdk 8.1.4__py3-none-any.whl → 8.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lemonade-sdk might be problematic. Click here for more details.
- lemonade/cache.py +6 -1
- lemonade/cli.py +47 -5
- lemonade/common/inference_engines.py +13 -4
- lemonade/common/status.py +4 -4
- lemonade/common/system_info.py +544 -1
- lemonade/profilers/agt_power.py +437 -0
- lemonade/profilers/hwinfo_power.py +429 -0
- lemonade/tools/accuracy.py +143 -48
- lemonade/tools/adapter.py +6 -1
- lemonade/tools/bench.py +26 -8
- lemonade/tools/flm/__init__.py +1 -0
- lemonade/tools/flm/utils.py +303 -0
- lemonade/tools/huggingface/bench.py +6 -1
- lemonade/tools/llamacpp/bench.py +146 -27
- lemonade/tools/llamacpp/load.py +30 -2
- lemonade/tools/llamacpp/utils.py +393 -33
- lemonade/tools/oga/bench.py +5 -26
- lemonade/tools/oga/load.py +60 -121
- lemonade/tools/oga/migration.py +403 -0
- lemonade/tools/report/table.py +76 -8
- lemonade/tools/server/flm.py +133 -0
- lemonade/tools/server/llamacpp.py +220 -553
- lemonade/tools/server/serve.py +684 -168
- lemonade/tools/server/static/js/chat.js +666 -342
- lemonade/tools/server/static/js/model-settings.js +24 -3
- lemonade/tools/server/static/js/models.js +597 -73
- lemonade/tools/server/static/js/shared.js +79 -14
- lemonade/tools/server/static/logs.html +191 -0
- lemonade/tools/server/static/styles.css +491 -66
- lemonade/tools/server/static/webapp.html +83 -31
- lemonade/tools/server/tray.py +158 -38
- lemonade/tools/server/utils/macos_tray.py +226 -0
- lemonade/tools/server/utils/{system_tray.py → windows_tray.py} +13 -0
- lemonade/tools/server/webapp.py +4 -1
- lemonade/tools/server/wrapped_server.py +559 -0
- lemonade/version.py +1 -1
- lemonade_install/install.py +54 -611
- {lemonade_sdk-8.1.4.dist-info → lemonade_sdk-8.2.2.dist-info}/METADATA +29 -72
- lemonade_sdk-8.2.2.dist-info/RECORD +83 -0
- lemonade_server/cli.py +145 -37
- lemonade_server/model_manager.py +521 -37
- lemonade_server/pydantic_models.py +28 -1
- lemonade_server/server_models.json +246 -92
- lemonade_server/settings.py +39 -39
- lemonade/tools/quark/__init__.py +0 -0
- lemonade/tools/quark/quark_load.py +0 -173
- lemonade/tools/quark/quark_quantize.py +0 -439
- lemonade_sdk-8.1.4.dist-info/RECORD +0 -77
- {lemonade_sdk-8.1.4.dist-info → lemonade_sdk-8.2.2.dist-info}/WHEEL +0 -0
- {lemonade_sdk-8.1.4.dist-info → lemonade_sdk-8.2.2.dist-info}/entry_points.txt +0 -0
- {lemonade_sdk-8.1.4.dist-info → lemonade_sdk-8.2.2.dist-info}/licenses/LICENSE +0 -0
- {lemonade_sdk-8.1.4.dist-info → lemonade_sdk-8.2.2.dist-info}/licenses/NOTICE.md +0 -0
- {lemonade_sdk-8.1.4.dist-info → lemonade_sdk-8.2.2.dist-info}/top_level.txt +0 -0
lemonade/cache.py
CHANGED
|
@@ -43,7 +43,11 @@ def build_name(input_name):
|
|
|
43
43
|
"""
|
|
44
44
|
|
|
45
45
|
if os.path.isdir(input_name):
|
|
46
|
+
# Input is a folder so no good way to determine a model name
|
|
46
47
|
input_name_sanitized = "local_model"
|
|
48
|
+
elif os.path.isfile(input_name):
|
|
49
|
+
# Use the filename without its extension
|
|
50
|
+
input_name_sanitized = os.path.splitext(os.path.basename(input_name))[0]
|
|
47
51
|
else:
|
|
48
52
|
# Sanitize the input name
|
|
49
53
|
input_name_sanitized = input_name.replace("/", "_")
|
|
@@ -63,8 +67,9 @@ class Keys:
|
|
|
63
67
|
TOKEN_GENERATION_TOKENS_PER_SECOND = "token_generation_tokens_per_second"
|
|
64
68
|
STD_DEV_TOKENS_PER_SECOND = "std_dev_tokens_per_second"
|
|
65
69
|
SECONDS_TO_FIRST_TOKEN = "seconds_to_first_token"
|
|
66
|
-
PREFILL_TOKENS_PER_SECOND = "prefill_tokens_per_second"
|
|
67
70
|
STD_DEV_SECONDS_TO_FIRST_TOKEN = "std_dev_seconds_to_first_token"
|
|
71
|
+
PREFILL_TOKENS_PER_SECOND = "prefill_tokens_per_second"
|
|
72
|
+
STD_DEV_PREFILL_TOKENS_PER_SECOND = "std_dev_prefill_tokens_per_second"
|
|
68
73
|
CHECKPOINT = "checkpoint"
|
|
69
74
|
DTYPE = "dtype"
|
|
70
75
|
PROMPT = "prompt"
|
lemonade/cli.py
CHANGED
|
@@ -12,6 +12,41 @@ from lemonade.sequence import Sequence
|
|
|
12
12
|
from lemonade.tools.management_tools import Cache, Version, SystemInfo
|
|
13
13
|
from lemonade.state import State
|
|
14
14
|
|
|
15
|
+
|
|
16
|
+
def get_available_profilers(warn_missing=False):
|
|
17
|
+
"""Get list of available profilers, with conditional imports for optional dependencies.
|
|
18
|
+
|
|
19
|
+
Args:
|
|
20
|
+
warn_missing: If True, print warnings for missing profilers. If False, fail silently.
|
|
21
|
+
"""
|
|
22
|
+
profilers = [MemoryTracker]
|
|
23
|
+
|
|
24
|
+
try:
|
|
25
|
+
from lemonade.profilers.hwinfo_power import HWINFOPowerProfiler
|
|
26
|
+
|
|
27
|
+
profilers.append(HWINFOPowerProfiler)
|
|
28
|
+
except ImportError:
|
|
29
|
+
if warn_missing:
|
|
30
|
+
print(
|
|
31
|
+
"Warning: HWINFOPowerProfiler not available. "
|
|
32
|
+
"Install lemonade with dev extras: "
|
|
33
|
+
"pip install lemonade-sdk[dev]"
|
|
34
|
+
)
|
|
35
|
+
try:
|
|
36
|
+
from lemonade.profilers.agt_power import AGTPowerProfiler
|
|
37
|
+
|
|
38
|
+
profilers.append(AGTPowerProfiler)
|
|
39
|
+
except ImportError:
|
|
40
|
+
if warn_missing:
|
|
41
|
+
print(
|
|
42
|
+
"Warning: AGTPowerProfiler not available. "
|
|
43
|
+
"Install lemonade with dev extras: "
|
|
44
|
+
"pip install lemonade-sdk[dev]"
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
return profilers
|
|
48
|
+
|
|
49
|
+
|
|
15
50
|
from lemonade.tools.huggingface.load import HuggingfaceLoad
|
|
16
51
|
from lemonade.tools.huggingface.bench import HuggingfaceBench
|
|
17
52
|
from lemonade.tools.oga.load import OgaLoad
|
|
@@ -25,8 +60,6 @@ from lemonade.tools.humaneval import AccuracyHumaneval
|
|
|
25
60
|
from lemonade.tools.perplexity import AccuracyPerplexity
|
|
26
61
|
from lemonade.tools.accuracy import LMEvalHarness
|
|
27
62
|
from lemonade.tools.prompt import LLMPrompt
|
|
28
|
-
from lemonade.tools.quark.quark_load import QuarkLoad
|
|
29
|
-
from lemonade.tools.quark.quark_quantize import QuarkQuantize
|
|
30
63
|
from lemonade.tools.report.llm_report import LemonadeReport
|
|
31
64
|
|
|
32
65
|
|
|
@@ -45,8 +78,6 @@ def main():
|
|
|
45
78
|
HuggingfaceBench,
|
|
46
79
|
OgaLoad,
|
|
47
80
|
OgaBench,
|
|
48
|
-
QuarkQuantize,
|
|
49
|
-
QuarkLoad,
|
|
50
81
|
LemonadeReport,
|
|
51
82
|
# Inherited from lemonade
|
|
52
83
|
Cache,
|
|
@@ -55,7 +86,7 @@ def main():
|
|
|
55
86
|
]
|
|
56
87
|
|
|
57
88
|
# List the available profilers
|
|
58
|
-
profilers =
|
|
89
|
+
profilers = get_available_profilers()
|
|
59
90
|
|
|
60
91
|
# Define the argument parser
|
|
61
92
|
parser = cli.CustomArgumentParser(
|
|
@@ -89,6 +120,17 @@ https://github.com/lemonade-sdk/lemonade/blob/main/docs/README.md""",
|
|
|
89
120
|
parser, tools, cli_name="lemonade"
|
|
90
121
|
)
|
|
91
122
|
|
|
123
|
+
# Check if any profilers are being requested
|
|
124
|
+
requested_profilers = [
|
|
125
|
+
profiler.unique_name.replace("-", "_")
|
|
126
|
+
for profiler in profilers
|
|
127
|
+
if global_args.get(profiler.unique_name.replace("-", "_"), None) is not None
|
|
128
|
+
]
|
|
129
|
+
|
|
130
|
+
# If profilers are requested, get the full list with warnings for missing ones
|
|
131
|
+
if requested_profilers:
|
|
132
|
+
get_available_profilers(warn_missing=True)
|
|
133
|
+
|
|
92
134
|
profiler_instances = [
|
|
93
135
|
profiler(global_args[profiler.unique_name.replace("-", "_")])
|
|
94
136
|
for profiler in profilers
|
|
@@ -24,7 +24,7 @@ class InferenceEngineDetector:
|
|
|
24
24
|
Detect all available inference engines for a specific device type.
|
|
25
25
|
|
|
26
26
|
Args:
|
|
27
|
-
device_type: "cpu", "amd_igpu", "amd_dgpu", or "npu"
|
|
27
|
+
device_type: "cpu", "amd_igpu", "amd_dgpu", "nvidia_dgpu", or "npu"
|
|
28
28
|
|
|
29
29
|
Returns:
|
|
30
30
|
dict: Engine availability information
|
|
@@ -223,17 +223,26 @@ class LlamaCppDetector(BaseEngineDetector):
|
|
|
223
223
|
"""
|
|
224
224
|
try:
|
|
225
225
|
|
|
226
|
-
if device_type not in ["cpu", "amd_igpu", "amd_dgpu"]:
|
|
226
|
+
if device_type not in ["cpu", "amd_igpu", "amd_dgpu", "nvidia_dgpu"]:
|
|
227
227
|
return None
|
|
228
228
|
|
|
229
229
|
# Check if the device is supported by the backend
|
|
230
230
|
if device_type == "cpu":
|
|
231
231
|
device_supported = True
|
|
232
|
-
elif device_type
|
|
232
|
+
elif device_type in ["amd_igpu", "amd_dgpu"]:
|
|
233
233
|
if backend == "vulkan":
|
|
234
234
|
device_supported = self._check_vulkan_support()
|
|
235
235
|
elif backend == "rocm":
|
|
236
236
|
device_supported = self._check_rocm_support(device_name.lower())
|
|
237
|
+
else:
|
|
238
|
+
device_supported = False
|
|
239
|
+
elif device_type == "nvidia_dgpu":
|
|
240
|
+
if backend == "vulkan":
|
|
241
|
+
device_supported = self._check_vulkan_support()
|
|
242
|
+
else:
|
|
243
|
+
device_supported = False
|
|
244
|
+
else:
|
|
245
|
+
device_supported = False
|
|
237
246
|
if not device_supported:
|
|
238
247
|
return {"available": False, "error": f"{backend} not available"}
|
|
239
248
|
|
|
@@ -390,7 +399,7 @@ def detect_inference_engines(device_type: str, device_name: str) -> Dict[str, Di
|
|
|
390
399
|
Helper function to detect inference engines for a device type.
|
|
391
400
|
|
|
392
401
|
Args:
|
|
393
|
-
device_type: "cpu", "amd_igpu", "amd_dgpu", or "npu"
|
|
402
|
+
device_type: "cpu", "amd_igpu", "amd_dgpu", "nvidia_dgpu", or "npu"
|
|
394
403
|
device_name: device name
|
|
395
404
|
|
|
396
405
|
Returns:
|
lemonade/common/status.py
CHANGED
|
@@ -112,10 +112,10 @@ class UniqueInvocationInfo(BasicInfo):
|
|
|
112
112
|
if print_file_name:
|
|
113
113
|
print(f"{self.script_name}{self.extension}:")
|
|
114
114
|
|
|
115
|
-
# Print invocation about the model (only applies to scripts, not ONNX files
|
|
115
|
+
# Print invocation about the model (only applies to scripts, not ONNX or GGUF files, nor
|
|
116
116
|
# LLMs, which have no extension)
|
|
117
117
|
if not (
|
|
118
|
-
self.extension
|
|
118
|
+
self.extension in [".onnx", ".gguf"]
|
|
119
119
|
or self.extension == build.state_file_name
|
|
120
120
|
or self.extension == ""
|
|
121
121
|
):
|
|
@@ -138,7 +138,7 @@ class UniqueInvocationInfo(BasicInfo):
|
|
|
138
138
|
|
|
139
139
|
if self.depth == 0:
|
|
140
140
|
print(f"{self.indent}\tLocation:\t{self.file}", end="")
|
|
141
|
-
if self.extension
|
|
141
|
+
if self.extension in [".onnx", ".gguf"]:
|
|
142
142
|
print()
|
|
143
143
|
else:
|
|
144
144
|
print(f", line {self.line}")
|
|
@@ -314,7 +314,7 @@ class UniqueInvocationInfo(BasicInfo):
|
|
|
314
314
|
Print information about a given model or submodel.
|
|
315
315
|
"""
|
|
316
316
|
|
|
317
|
-
if self.extension
|
|
317
|
+
if self.extension in [".onnx", ".gguf"] or self.extension == "":
|
|
318
318
|
self.indent = "\t" * (2 * self.depth)
|
|
319
319
|
else:
|
|
320
320
|
self.indent = "\t" * (2 * self.depth + 1)
|