lemonade-sdk 8.0.1__tar.gz → 8.0.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lemonade-sdk might be problematic. Click here for more details.

Files changed (77) hide show
  1. {lemonade_sdk-8.0.1/src/lemonade_sdk.egg-info → lemonade_sdk-8.0.3}/PKG-INFO +29 -25
  2. {lemonade_sdk-8.0.1 → lemonade_sdk-8.0.3}/setup.py +26 -26
  3. {lemonade_sdk-8.0.1 → lemonade_sdk-8.0.3}/src/lemonade/cli.py +2 -2
  4. {lemonade_sdk-8.0.1 → lemonade_sdk-8.0.3}/src/lemonade/profilers/profiler.py +4 -1
  5. {lemonade_sdk-8.0.1 → lemonade_sdk-8.0.3}/src/lemonade/tools/oga/load.py +2 -8
  6. {lemonade_sdk-8.0.1 → lemonade_sdk-8.0.3}/src/lemonade/tools/prompt.py +21 -6
  7. {lemonade_sdk-8.0.1 → lemonade_sdk-8.0.3}/src/lemonade/tools/report/table.py +89 -0
  8. {lemonade_sdk-8.0.1 → lemonade_sdk-8.0.3}/src/lemonade/tools/server/llamacpp.py +18 -7
  9. {lemonade_sdk-8.0.1 → lemonade_sdk-8.0.3}/src/lemonade/tools/server/static/webapp.html +36 -3
  10. {lemonade_sdk-8.0.1 → lemonade_sdk-8.0.3}/src/lemonade/tools/server/tray.py +44 -15
  11. lemonade_sdk-8.0.3/src/lemonade/version.py +1 -0
  12. {lemonade_sdk-8.0.1 → lemonade_sdk-8.0.3/src/lemonade_sdk.egg-info}/PKG-INFO +29 -25
  13. {lemonade_sdk-8.0.1 → lemonade_sdk-8.0.3}/src/lemonade_sdk.egg-info/requires.txt +23 -16
  14. {lemonade_sdk-8.0.1 → lemonade_sdk-8.0.3}/src/lemonade_server/cli.py +14 -12
  15. {lemonade_sdk-8.0.1 → lemonade_sdk-8.0.3}/src/lemonade_server/model_manager.py +111 -34
  16. {lemonade_sdk-8.0.1 → lemonade_sdk-8.0.3}/src/lemonade_server/pydantic_models.py +1 -0
  17. {lemonade_sdk-8.0.1 → lemonade_sdk-8.0.3}/src/lemonade_server/server_models.json +8 -0
  18. lemonade_sdk-8.0.1/src/lemonade/version.py +0 -1
  19. {lemonade_sdk-8.0.1 → lemonade_sdk-8.0.3}/LICENSE +0 -0
  20. {lemonade_sdk-8.0.1 → lemonade_sdk-8.0.3}/NOTICE.md +0 -0
  21. {lemonade_sdk-8.0.1 → lemonade_sdk-8.0.3}/README.md +0 -0
  22. {lemonade_sdk-8.0.1 → lemonade_sdk-8.0.3}/pyproject.toml +0 -0
  23. {lemonade_sdk-8.0.1 → lemonade_sdk-8.0.3}/setup.cfg +0 -0
  24. {lemonade_sdk-8.0.1 → lemonade_sdk-8.0.3}/src/lemonade/__init__.py +0 -0
  25. {lemonade_sdk-8.0.1 → lemonade_sdk-8.0.3}/src/lemonade/api.py +0 -0
  26. {lemonade_sdk-8.0.1 → lemonade_sdk-8.0.3}/src/lemonade/cache.py +0 -0
  27. {lemonade_sdk-8.0.1 → lemonade_sdk-8.0.3}/src/lemonade/common/__init__.py +0 -0
  28. {lemonade_sdk-8.0.1 → lemonade_sdk-8.0.3}/src/lemonade/common/build.py +0 -0
  29. {lemonade_sdk-8.0.1 → lemonade_sdk-8.0.3}/src/lemonade/common/cli_helpers.py +0 -0
  30. {lemonade_sdk-8.0.1 → lemonade_sdk-8.0.3}/src/lemonade/common/exceptions.py +0 -0
  31. {lemonade_sdk-8.0.1 → lemonade_sdk-8.0.3}/src/lemonade/common/filesystem.py +0 -0
  32. {lemonade_sdk-8.0.1 → lemonade_sdk-8.0.3}/src/lemonade/common/network.py +0 -0
  33. {lemonade_sdk-8.0.1 → lemonade_sdk-8.0.3}/src/lemonade/common/printing.py +0 -0
  34. {lemonade_sdk-8.0.1 → lemonade_sdk-8.0.3}/src/lemonade/common/status.py +0 -0
  35. {lemonade_sdk-8.0.1 → lemonade_sdk-8.0.3}/src/lemonade/common/system_info.py +0 -0
  36. {lemonade_sdk-8.0.1 → lemonade_sdk-8.0.3}/src/lemonade/common/test_helpers.py +0 -0
  37. {lemonade_sdk-8.0.1 → lemonade_sdk-8.0.3}/src/lemonade/profilers/__init__.py +0 -0
  38. {lemonade_sdk-8.0.1 → lemonade_sdk-8.0.3}/src/lemonade/profilers/memory_tracker.py +0 -0
  39. {lemonade_sdk-8.0.1 → lemonade_sdk-8.0.3}/src/lemonade/sequence.py +0 -0
  40. {lemonade_sdk-8.0.1 → lemonade_sdk-8.0.3}/src/lemonade/state.py +0 -0
  41. {lemonade_sdk-8.0.1 → lemonade_sdk-8.0.3}/src/lemonade/tools/__init__.py +0 -0
  42. {lemonade_sdk-8.0.1 → lemonade_sdk-8.0.3}/src/lemonade/tools/accuracy.py +0 -0
  43. {lemonade_sdk-8.0.1 → lemonade_sdk-8.0.3}/src/lemonade/tools/adapter.py +0 -0
  44. {lemonade_sdk-8.0.1 → lemonade_sdk-8.0.3}/src/lemonade/tools/bench.py +0 -0
  45. {lemonade_sdk-8.0.1 → lemonade_sdk-8.0.3}/src/lemonade/tools/huggingface/bench.py +0 -0
  46. {lemonade_sdk-8.0.1 → lemonade_sdk-8.0.3}/src/lemonade/tools/huggingface/load.py +0 -0
  47. {lemonade_sdk-8.0.1 → lemonade_sdk-8.0.3}/src/lemonade/tools/huggingface/utils.py +0 -0
  48. {lemonade_sdk-8.0.1 → lemonade_sdk-8.0.3}/src/lemonade/tools/humaneval.py +0 -0
  49. {lemonade_sdk-8.0.1 → lemonade_sdk-8.0.3}/src/lemonade/tools/llamacpp/bench.py +0 -0
  50. {lemonade_sdk-8.0.1 → lemonade_sdk-8.0.3}/src/lemonade/tools/llamacpp/load.py +0 -0
  51. {lemonade_sdk-8.0.1 → lemonade_sdk-8.0.3}/src/lemonade/tools/management_tools.py +0 -0
  52. {lemonade_sdk-8.0.1 → lemonade_sdk-8.0.3}/src/lemonade/tools/mmlu.py +0 -0
  53. {lemonade_sdk-8.0.1 → lemonade_sdk-8.0.3}/src/lemonade/tools/oga/__init__.py +0 -0
  54. {lemonade_sdk-8.0.1 → lemonade_sdk-8.0.3}/src/lemonade/tools/oga/bench.py +0 -0
  55. {lemonade_sdk-8.0.1 → lemonade_sdk-8.0.3}/src/lemonade/tools/oga/utils.py +0 -0
  56. {lemonade_sdk-8.0.1 → lemonade_sdk-8.0.3}/src/lemonade/tools/perplexity.py +0 -0
  57. {lemonade_sdk-8.0.1 → lemonade_sdk-8.0.3}/src/lemonade/tools/quark/__init__.py +0 -0
  58. {lemonade_sdk-8.0.1 → lemonade_sdk-8.0.3}/src/lemonade/tools/quark/quark_load.py +0 -0
  59. {lemonade_sdk-8.0.1 → lemonade_sdk-8.0.3}/src/lemonade/tools/quark/quark_quantize.py +0 -0
  60. {lemonade_sdk-8.0.1 → lemonade_sdk-8.0.3}/src/lemonade/tools/report/__init__.py +0 -0
  61. {lemonade_sdk-8.0.1 → lemonade_sdk-8.0.3}/src/lemonade/tools/report/llm_report.py +0 -0
  62. {lemonade_sdk-8.0.1 → lemonade_sdk-8.0.3}/src/lemonade/tools/server/__init__.py +0 -0
  63. {lemonade_sdk-8.0.1 → lemonade_sdk-8.0.3}/src/lemonade/tools/server/serve.py +0 -0
  64. {lemonade_sdk-8.0.1 → lemonade_sdk-8.0.3}/src/lemonade/tools/server/static/favicon.ico +0 -0
  65. {lemonade_sdk-8.0.1 → lemonade_sdk-8.0.3}/src/lemonade/tools/server/static/styles.css +0 -0
  66. {lemonade_sdk-8.0.1 → lemonade_sdk-8.0.3}/src/lemonade/tools/server/tool_calls.py +0 -0
  67. {lemonade_sdk-8.0.1 → lemonade_sdk-8.0.3}/src/lemonade/tools/server/utils/port.py +0 -0
  68. {lemonade_sdk-8.0.1 → lemonade_sdk-8.0.3}/src/lemonade/tools/server/utils/system_tray.py +0 -0
  69. {lemonade_sdk-8.0.1 → lemonade_sdk-8.0.3}/src/lemonade/tools/server/utils/thread.py +0 -0
  70. {lemonade_sdk-8.0.1 → lemonade_sdk-8.0.3}/src/lemonade/tools/server/webapp.py +0 -0
  71. {lemonade_sdk-8.0.1 → lemonade_sdk-8.0.3}/src/lemonade/tools/tool.py +0 -0
  72. {lemonade_sdk-8.0.1 → lemonade_sdk-8.0.3}/src/lemonade_install/__init__.py +0 -0
  73. {lemonade_sdk-8.0.1 → lemonade_sdk-8.0.3}/src/lemonade_install/install.py +0 -0
  74. {lemonade_sdk-8.0.1 → lemonade_sdk-8.0.3}/src/lemonade_sdk.egg-info/SOURCES.txt +0 -0
  75. {lemonade_sdk-8.0.1 → lemonade_sdk-8.0.3}/src/lemonade_sdk.egg-info/dependency_links.txt +0 -0
  76. {lemonade_sdk-8.0.1 → lemonade_sdk-8.0.3}/src/lemonade_sdk.egg-info/entry_points.txt +0 -0
  77. {lemonade_sdk-8.0.1 → lemonade_sdk-8.0.3}/src/lemonade_sdk.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lemonade-sdk
3
- Version: 8.0.1
3
+ Version: 8.0.3
4
4
  Summary: Lemonade SDK: Your LLM Aide for Validation and Deployment
5
5
  Author-email: lemonade@amd.com
6
6
  Requires-Python: >=3.10, <3.12
@@ -26,45 +26,49 @@ Requires-Dist: openai>=1.81.0
26
26
  Requires-Dist: transformers<=4.51.3
27
27
  Requires-Dist: jinja2
28
28
  Requires-Dist: tabulate
29
- Requires-Dist: huggingface-hub==0.30.2
29
+ Requires-Dist: sentencepiece
30
+ Requires-Dist: huggingface-hub==0.33.0
31
+ Provides-Extra: oga-hybrid
32
+ Requires-Dist: onnx==1.16.1; extra == "oga-hybrid"
33
+ Requires-Dist: numpy==1.26.4; extra == "oga-hybrid"
34
+ Requires-Dist: protobuf>=6.30.1; extra == "oga-hybrid"
35
+ Provides-Extra: oga-cpu
36
+ Requires-Dist: onnxruntime-genai==0.8.2; extra == "oga-cpu"
37
+ Requires-Dist: onnxruntime>=1.22.0; extra == "oga-cpu"
38
+ Provides-Extra: dev
39
+ Requires-Dist: torch>=2.6.0; extra == "dev"
40
+ Requires-Dist: accelerate; extra == "dev"
41
+ Requires-Dist: datasets; extra == "dev"
42
+ Requires-Dist: pandas>=1.5.3; extra == "dev"
43
+ Requires-Dist: matplotlib; extra == "dev"
44
+ Requires-Dist: human-eval-windows==1.0.4; extra == "dev"
45
+ Requires-Dist: lm-eval[api]; extra == "dev"
30
46
  Provides-Extra: oga-hybrid-minimal
31
- Requires-Dist: onnx==1.16.1; extra == "oga-hybrid-minimal"
32
- Requires-Dist: numpy==1.26.4; extra == "oga-hybrid-minimal"
33
- Requires-Dist: protobuf>=6.30.1; extra == "oga-hybrid-minimal"
47
+ Requires-Dist: lemonade-sdk[oga-hybrid]; extra == "oga-hybrid-minimal"
34
48
  Provides-Extra: oga-cpu-minimal
35
- Requires-Dist: onnxruntime-genai==0.6.0; extra == "oga-cpu-minimal"
36
- Requires-Dist: onnxruntime<1.22.0,>=1.10.1; extra == "oga-cpu-minimal"
49
+ Requires-Dist: lemonade-sdk[oga-cpu]; extra == "oga-cpu-minimal"
37
50
  Provides-Extra: llm
38
- Requires-Dist: torch>=2.6.0; extra == "llm"
39
- Requires-Dist: accelerate; extra == "llm"
40
- Requires-Dist: sentencepiece; extra == "llm"
41
- Requires-Dist: datasets; extra == "llm"
42
- Requires-Dist: pandas>=1.5.3; extra == "llm"
43
- Requires-Dist: matplotlib; extra == "llm"
44
- Requires-Dist: human-eval-windows==1.0.4; extra == "llm"
45
- Requires-Dist: lm-eval[api]; extra == "llm"
51
+ Requires-Dist: lemonade-sdk[dev]; extra == "llm"
46
52
  Provides-Extra: llm-oga-cpu
47
- Requires-Dist: lemonade-sdk[oga-cpu-minimal]; extra == "llm-oga-cpu"
48
- Requires-Dist: lemonade-sdk[llm]; extra == "llm-oga-cpu"
53
+ Requires-Dist: lemonade-sdk[dev,oga-cpu]; extra == "llm-oga-cpu"
49
54
  Provides-Extra: llm-oga-igpu
50
55
  Requires-Dist: onnxruntime-genai-directml==0.6.0; extra == "llm-oga-igpu"
51
56
  Requires-Dist: onnxruntime-directml<1.22.0,>=1.19.0; extra == "llm-oga-igpu"
52
57
  Requires-Dist: transformers<4.45.0; extra == "llm-oga-igpu"
53
- Requires-Dist: lemonade-sdk[llm]; extra == "llm-oga-igpu"
58
+ Requires-Dist: lemonade-sdk[dev]; extra == "llm-oga-igpu"
54
59
  Provides-Extra: llm-oga-cuda
55
- Requires-Dist: onnxruntime-genai-cuda==0.6.0; extra == "llm-oga-cuda"
56
- Requires-Dist: onnxruntime-gpu<1.22.0,>=1.19.1; extra == "llm-oga-cuda"
57
- Requires-Dist: transformers<4.45.0; extra == "llm-oga-cuda"
58
- Requires-Dist: lemonade-sdk[llm]; extra == "llm-oga-cuda"
60
+ Requires-Dist: onnxruntime-genai-cuda==0.8.2; extra == "llm-oga-cuda"
61
+ Requires-Dist: onnxruntime-gpu>=1.22.0; extra == "llm-oga-cuda"
62
+ Requires-Dist: transformers<=4.51.3; extra == "llm-oga-cuda"
63
+ Requires-Dist: lemonade-sdk[dev]; extra == "llm-oga-cuda"
59
64
  Provides-Extra: llm-oga-npu
60
65
  Requires-Dist: onnx==1.16.0; extra == "llm-oga-npu"
61
66
  Requires-Dist: onnxruntime==1.18.0; extra == "llm-oga-npu"
62
67
  Requires-Dist: numpy==1.26.4; extra == "llm-oga-npu"
63
68
  Requires-Dist: protobuf>=6.30.1; extra == "llm-oga-npu"
64
- Requires-Dist: lemonade-sdk[llm]; extra == "llm-oga-npu"
69
+ Requires-Dist: lemonade-sdk[dev]; extra == "llm-oga-npu"
65
70
  Provides-Extra: llm-oga-hybrid
66
- Requires-Dist: lemonade-sdk[oga-hybrid-minimal]; extra == "llm-oga-hybrid"
67
- Requires-Dist: lemonade-sdk[llm]; extra == "llm-oga-hybrid"
71
+ Requires-Dist: lemonade-sdk[dev,oga-hybrid]; extra == "llm-oga-hybrid"
68
72
  Provides-Extra: llm-oga-unified
69
73
  Requires-Dist: lemonade-sdk[llm-oga-hybrid]; extra == "llm-oga-unified"
70
74
  Dynamic: author-email
@@ -48,29 +48,30 @@ setup(
48
48
  "transformers<=4.51.3",
49
49
  "jinja2",
50
50
  "tabulate",
51
- # huggingface-hub==0.31.0 introduces a new transfer protocol that was causing us issues
52
- "huggingface-hub==0.30.2",
51
+ "sentencepiece",
52
+ "huggingface-hub==0.33.0",
53
53
  ],
54
54
  extras_require={
55
- # The -minimal extras are meant to deploy specific backends into end-user
55
+ # The non-dev extras are meant to deploy specific backends into end-user
56
56
  # applications, without including developer-focused tools
57
- "oga-hybrid-minimal": [
57
+ "oga-hybrid": [
58
58
  # Note: `lemonade-install --ryzenai hybrid` is necessary
59
59
  # to complete installation
60
60
  "onnx==1.16.1",
61
61
  "numpy==1.26.4",
62
62
  "protobuf>=6.30.1",
63
63
  ],
64
- "oga-cpu-minimal": [
65
- "onnxruntime-genai==0.6.0",
66
- "onnxruntime >=1.10.1,<1.22.0",
64
+ "oga-cpu": [
65
+ "onnxruntime-genai==0.8.2",
66
+ "onnxruntime >=1.22.0",
67
67
  ],
68
- "llm": [
68
+ # Developer-focused tools for benchmarking, accuracy testing, and
69
+ # model preparation (ONNX export, quantization, device-specifc optimization, etc.)
70
+ "dev": [
69
71
  # Minimal dependencies for developers to use all features of
70
72
  # Lemonade SDK, including building and optimizing models
71
73
  "torch>=2.6.0",
72
74
  "accelerate",
73
- "sentencepiece",
74
75
  "datasets",
75
76
  "pandas>=1.5.3",
76
77
  "matplotlib",
@@ -79,36 +80,35 @@ setup(
79
80
  "human-eval-windows==1.0.4",
80
81
  "lm-eval[api]",
81
82
  ],
82
- "llm-oga-cpu": [
83
- "lemonade-sdk[oga-cpu-minimal]",
84
- "lemonade-sdk[llm]",
85
- ],
83
+ # Keep backwards compatibility for old extras names
84
+ "oga-hybrid-minimal": ["lemonade-sdk[oga-hybrid]"],
85
+ "oga-cpu-minimal": ["lemonade-sdk[oga-cpu]"],
86
+ "llm": ["lemonade-sdk[dev]"],
87
+ "llm-oga-cpu": ["lemonade-sdk[dev,oga-cpu]"],
88
+ # The following extras are deprecated and/or not commonly used
86
89
  "llm-oga-igpu": [
87
90
  "onnxruntime-genai-directml==0.6.0",
88
91
  "onnxruntime-directml>=1.19.0,<1.22.0",
89
92
  "transformers<4.45.0",
90
- "lemonade-sdk[llm]",
93
+ "lemonade-sdk[dev]",
91
94
  ],
92
95
  "llm-oga-cuda": [
93
- "onnxruntime-genai-cuda==0.6.0",
94
- "onnxruntime-gpu >=1.19.1,<1.22.0",
95
- "transformers<4.45.0",
96
- "lemonade-sdk[llm]",
96
+ "onnxruntime-genai-cuda==0.8.2",
97
+ "onnxruntime-gpu >=1.22.0",
98
+ "transformers<=4.51.3",
99
+ "lemonade-sdk[dev]",
97
100
  ],
98
101
  "llm-oga-npu": [
99
102
  "onnx==1.16.0",
103
+ # NPU requires specific onnxruntime version for Ryzen AI compatibility
104
+ # This may conflict with other OGA extras that require >=1.22.0
100
105
  "onnxruntime==1.18.0",
101
106
  "numpy==1.26.4",
102
107
  "protobuf>=6.30.1",
103
- "lemonade-sdk[llm]",
104
- ],
105
- "llm-oga-hybrid": [
106
- "lemonade-sdk[oga-hybrid-minimal]",
107
- "lemonade-sdk[llm]",
108
- ],
109
- "llm-oga-unified": [
110
- "lemonade-sdk[llm-oga-hybrid]",
108
+ "lemonade-sdk[dev]",
111
109
  ],
110
+ "llm-oga-hybrid": ["lemonade-sdk[dev,oga-hybrid]"],
111
+ "llm-oga-unified": ["lemonade-sdk[llm-oga-hybrid]"],
112
112
  },
113
113
  classifiers=[],
114
114
  entry_points={
@@ -90,9 +90,9 @@ https://github.com/lemonade-sdk/lemonade/blob/main/docs/README.md""",
90
90
  )
91
91
 
92
92
  profiler_instances = [
93
- profiler(global_args[profiler.unique_name])
93
+ profiler(global_args[profiler.unique_name.replace("-", "_")])
94
94
  for profiler in profilers
95
- if global_args.get(profiler.unique_name, None) is not None
95
+ if global_args.get(profiler.unique_name.replace("-", "_"), None) is not None
96
96
  ]
97
97
 
98
98
  if len(evaluation_tools) > 0:
@@ -48,7 +48,10 @@ class Profiler(abc.ABC):
48
48
  This method is called so that the profiler can create its output files.
49
49
  The state is passed so that build info can be gathered and stats can be written.
50
50
  The timestamp can be used for filename in current working directory.
51
- The start times contain a list of tools and start times.
51
+ The start times parameter is a dict with the keys being the tools names and
52
+ the values being the time the tool started. There is an initial "warmup" key
53
+ that has a start time before the first tool and a "cool down" key that contains the
54
+ time when the last tool ended.
52
55
  """
53
56
 
54
57
 
@@ -1,12 +1,6 @@
1
1
  # onnxruntime_genai is not lint-friendly yet and PyLint can't
2
2
  # find any of the class methods
3
3
  # pylint: disable=no-member
4
- #
5
- # Model builder constraints:
6
- # 11/10/24 Need transformers <4.45.0 OR onnxruntime-genai 0.5.0 (which must be built from source)
7
- # (transformers v4.45 changes the format of the tokenizer.json file which will be supported in
8
- # onnxruntime-genai 0.5)
9
- #
10
4
 
11
5
  import argparse
12
6
  import os
@@ -51,8 +45,8 @@ def import_error_heler(e: Exception):
51
45
  """
52
46
  raise ImportError(
53
47
  f"{e}\n Please install lemonade-sdk with "
54
- "one of the llm-oga extras, for example:\n"
55
- "pip install lemonade-sdk[llm-oga-cpu]\n"
48
+ "one of the oga extras, for example:\n"
49
+ "pip install lemonade-sdk[dev,oga-cpu]\n"
56
50
  "See https://lemonade_server.ai/install_options.html for details"
57
51
  )
58
52
 
@@ -176,12 +176,21 @@ class LLMPrompt(Tool):
176
176
 
177
177
  input_ids = tokenizer(prompt, return_tensors="pt").input_ids
178
178
  if isinstance(input_ids, (list, str)):
179
- # OGA models return a list of tokens
179
+ # OGA models return a list of tokens (older versions)
180
180
  # Our llama.cpp adapter returns a string
181
181
  len_tokens_in = len(input_ids)
182
- else:
182
+ elif hasattr(input_ids, "shape"):
183
183
  # HF models return a 2-D tensor
184
- len_tokens_in = input_ids.shape[1]
184
+ # OGA models with newer versions may return numpy arrays
185
+ if len(input_ids.shape) == 1:
186
+ # 1-D array from newer OGA versions
187
+ len_tokens_in = len(input_ids)
188
+ else:
189
+ # 2-D tensor from HF models
190
+ len_tokens_in = input_ids.shape[1]
191
+ else:
192
+ # Fallback: try to get length directly
193
+ len_tokens_in = len(input_ids)
185
194
 
186
195
  len_tokens_out = []
187
196
  response_texts = []
@@ -202,9 +211,15 @@ class LLMPrompt(Tool):
202
211
  random_seed += 1
203
212
 
204
213
  # Flatten the input and response
205
- input_ids_array = (
206
- input_ids if isinstance(input_ids, (list, str)) else input_ids[0]
207
- )
214
+ if isinstance(input_ids, (list, str)):
215
+ input_ids_array = input_ids
216
+ elif hasattr(input_ids, "shape") and len(input_ids.shape) == 1:
217
+ # 1-D array from newer OGA versions - already flat
218
+ input_ids_array = input_ids
219
+ else:
220
+ # 2-D tensor from HF models - take first row
221
+ input_ids_array = input_ids[0]
222
+
208
223
  response_array = response if isinstance(response, str) else response[0]
209
224
 
210
225
  # Separate the prompt from the response
@@ -7,6 +7,7 @@ from tabulate import tabulate
7
7
  import lemonade.common.build as build
8
8
  import lemonade.common.filesystem as fs
9
9
  from lemonade.cache import Keys
10
+ from lemonade.tools.accuracy import LMEvalHarness
10
11
  from lemonade.tools.huggingface.bench import HuggingfaceBench
11
12
  from lemonade.tools.llamacpp.bench import LlamaCppBench
12
13
  from lemonade.tools.mmlu import AccuracyMMLU
@@ -73,6 +74,7 @@ class SimpleStat(TableColumn):
73
74
  align="center",
74
75
  omit_if_lean=False,
75
76
  wrap=None,
77
+ stat_fn=None,
76
78
  ):
77
79
  self.column_header = column_header
78
80
  self.stat = stat
@@ -80,6 +82,7 @@ class SimpleStat(TableColumn):
80
82
  self.align = align
81
83
  self.omit_if_lean = omit_if_lean
82
84
  self.wrap = wrap or self.default_wrap
85
+ self.stat_fn = stat_fn
83
86
 
84
87
  def get_str(self, build_stats, lean=False):
85
88
  if lean and self.omit_if_lean:
@@ -87,6 +90,8 @@ class SimpleStat(TableColumn):
87
90
  data = build_stats.get(self.stat, None)
88
91
  if data is None:
89
92
  return ""
93
+ if self.stat_fn:
94
+ data = self.stat_fn(data)
90
95
  cell_str = "\n".join(
91
96
  [_wrap(f"{x:{self.format_str}}", self.wrap) for x in _to_list(data)]
92
97
  )
@@ -232,6 +237,47 @@ class AdditionalStat(TableColumn):
232
237
  return "\n".join(cell_entry)
233
238
 
234
239
 
240
+ class DictListStat(TableColumn):
241
+ """
242
+ A statistic that is a list of dicts and values from a given list of keys will be
243
+ pulled out of each dict and placed in the cell
244
+ """
245
+
246
+ def __init__(
247
+ self,
248
+ column_header,
249
+ statistic_name,
250
+ key_format_list,
251
+ align="center",
252
+ omit_if_lean=False,
253
+ wrap=None,
254
+ ):
255
+ self.column_header = column_header
256
+ self.statistic_name = statistic_name
257
+ self.key_format_list = key_format_list
258
+ self.align = align
259
+ self.omit_if_lean = omit_if_lean
260
+ self.wrap = wrap or self.default_wrap
261
+
262
+ def get_str(self, build_stats, lean=False):
263
+ if lean and self.omit_if_lean:
264
+ return None
265
+ stat = build_stats.get(self.statistic_name, None)
266
+ if not stat:
267
+ return ""
268
+ cell_entry = []
269
+ for stat_dict in stat:
270
+ line = [
271
+ format_str.format(stat_dict[key])
272
+ for key, format_str in self.key_format_list
273
+ ]
274
+ cell_entry.append(" ".join(line))
275
+ return "\n".join(cell_entry)
276
+
277
+ def get_keys(self):
278
+ return [self.statistic_name]
279
+
280
+
235
281
  ################################################################################
236
282
  # ABSTRACT BASE CLASS FOR DEFINING A TABLE
237
283
  ################################################################################
@@ -349,6 +395,28 @@ class Table(ABC):
349
395
  headers.append(column.column_header)
350
396
  col_align += (column.align,)
351
397
 
398
+ # Stat column headers
399
+ stat_columns = self.table_descriptor.get("stat_columns", [])
400
+ stat_columns_include = []
401
+ for column in stat_columns:
402
+ # Check to see that at least one build has data for the column
403
+ keep_column = False
404
+ if not (self.lean and column.omit_if_lean):
405
+ keys = column.get_keys()
406
+ for build_stats in self.all_stats:
407
+ found = [(key in build_stats) for key in keys]
408
+ if any(found):
409
+ keep_column = True
410
+ headers.append(column.column_header)
411
+ col_align += (column.align,)
412
+ break
413
+ stat_columns_include.append(keep_column)
414
+ stat_columns = [
415
+ column
416
+ for column, include in zip(stat_columns, stat_columns_include)
417
+ if include
418
+ ]
419
+
352
420
  # Final headers
353
421
  last_columns = self.table_descriptor.get("last_columns", [])
354
422
  for column in last_columns:
@@ -385,6 +453,12 @@ class Table(ABC):
385
453
  if entry_str is not None:
386
454
  row.append(entry_str)
387
455
 
456
+ # Per stat columns
457
+ for entry in stat_columns:
458
+ entry_str = entry.get_str(build_stats, self.lean)
459
+ if entry_str is not None:
460
+ row.append(entry_str)
461
+
388
462
  # Final columns
389
463
  for entry in last_columns:
390
464
  entry_str = entry.get_str(build_stats, self.lean)
@@ -513,6 +587,12 @@ class LemonadePerfTable(Table):
513
587
  Keys.STD_DEV_TOKENS_PER_SECOND,
514
588
  ".2f",
515
589
  ),
590
+ SimpleStat(
591
+ _wrap("Total Generated Tokens", 9),
592
+ Keys.RESPONSE_TOKENS,
593
+ "d",
594
+ stat_fn=sum,
595
+ ),
516
596
  SimpleStat(
517
597
  _wrap("Memory Used (GB)", 8), Keys.MAX_MEMORY_USED_GBYTE, ".3f"
518
598
  ),
@@ -527,7 +607,16 @@ class LemonadePerfTable(Table):
527
607
  ".2f",
528
608
  )
529
609
  ],
610
+ LMEvalHarness: [
611
+ AdditionalStat(
612
+ "EleutherAI\nLM Evaluation",
613
+ "^lm_eval_",
614
+ "^lm_eval_",
615
+ ".1f",
616
+ )
617
+ ],
530
618
  },
619
+ "stat_columns": [],
531
620
  "last_columns": [
532
621
  SimpleStat(
533
622
  "System Info",
@@ -210,15 +210,20 @@ def _log_subprocess_output(
210
210
  """
211
211
 
212
212
  if process.stdout:
213
- for line in iter(process.stdout.readline, ""):
214
- if line:
215
- line_stripped = line.strip()
216
- logging.debug("%s: %s", prefix, line_stripped)
213
+ try:
214
+ for line in iter(process.stdout.readline, ""):
215
+ if line:
216
+ line_stripped = line.strip()
217
+ logging.debug("%s: %s", prefix, line_stripped)
217
218
 
218
- telemetry.parse_telemetry_line(line_stripped)
219
+ telemetry.parse_telemetry_line(line_stripped)
219
220
 
220
- if process.poll() is not None:
221
- break
221
+ if process.poll() is not None:
222
+ break
223
+ except UnicodeDecodeError as e:
224
+ logging.debug("Unicode decode error reading subprocess output: %s", str(e))
225
+ except Exception as e: # pylint: disable=broad-exception-caught
226
+ logging.error("Unexpected error reading subprocess output: %s", str(e))
222
227
 
223
228
 
224
229
  def _wait_for_load(llama_server_process: subprocess.Popen, port: int):
@@ -287,6 +292,8 @@ def _launch_llama_subprocess(
287
292
  stdout=subprocess.PIPE,
288
293
  stderr=subprocess.STDOUT,
289
294
  text=True,
295
+ encoding="utf-8",
296
+ errors="replace",
290
297
  bufsize=1,
291
298
  env=env,
292
299
  )
@@ -383,6 +390,10 @@ def server_load(model_config: PullConfig, telemetry: LlamaTelemetry):
383
390
  f"Loading {model_config.model_name} on GPU didn't work, re-attempting on CPU"
384
391
  )
385
392
 
393
+ if os.environ.get("LEMONADE_LLAMACPP_NO_FALLBACK"):
394
+ # Used for testing, when the test should fail if GPU didn't work
395
+ raise Exception("llamacpp GPU loading failed")
396
+
386
397
  llama_server_process = _launch_llama_subprocess(
387
398
  snapshot_files, use_gpu=False, telemetry=telemetry
388
399
  )
@@ -110,20 +110,53 @@
110
110
  </footer>
111
111
  <script src="https://cdn.jsdelivr.net/npm/openai@4.21.0/dist/openai.min.js"></script>
112
112
  <script> // Tab switching logic
113
- function showTab(tab) {
113
+ function showTab(tab, updateHash = true) {
114
114
  document.getElementById('tab-chat').classList.remove('active');
115
115
  document.getElementById('tab-models').classList.remove('active');
116
116
  document.getElementById('content-chat').classList.remove('active');
117
117
  document.getElementById('content-models').classList.remove('active');
118
118
  if (tab === 'chat') {
119
119
  document.getElementById('tab-chat').classList.add('active');
120
- document.getElementById('content-chat').classList.add('active');
120
+ document.getElementById('content-chat').classList.add('active');
121
+ if (updateHash) {
122
+ window.location.hash = 'llm-chat';
123
+ }
121
124
  } else {
122
125
  document.getElementById('tab-models').classList.add('active');
123
- document.getElementById('content-models').classList.add('active');
126
+ document.getElementById('content-models').classList.add('active');
127
+ if (updateHash) {
128
+ window.location.hash = 'model-management';
129
+ }
124
130
  }
125
131
  }
126
132
 
133
+ // Handle hash changes for anchor navigation
134
+ function handleHashChange() {
135
+ const hash = window.location.hash.slice(1); // Remove the # symbol
136
+ if (hash === 'llm-chat') {
137
+ showTab('chat', false);
138
+ } else if (hash === 'model-management') {
139
+ showTab('models', false);
140
+ }
141
+ }
142
+
143
+ // Initialize tab based on URL hash on page load
144
+ function initializeTabFromHash() {
145
+ const hash = window.location.hash.slice(1);
146
+ if (hash === 'llm-chat') {
147
+ showTab('chat', false);
148
+ } else if (hash === 'model-management') {
149
+ showTab('models', false);
150
+ }
151
+ // If no hash or unrecognized hash, keep default (chat tab is already active)
152
+ }
153
+
154
+ // Listen for hash changes
155
+ window.addEventListener('hashchange', handleHashChange);
156
+
157
+ // Initialize on page load
158
+ document.addEventListener('DOMContentLoaded', initializeTabFromHash);
159
+
127
160
  // Toggle Add Model form
128
161
  function toggleAddModelForm() {
129
162
  const form = document.querySelector('.model-mgmt-register-form');
@@ -197,11 +197,17 @@ class LemonadeTray(SystemTray):
197
197
  """
198
198
  webbrowser.open("https://lemonade-server.ai/docs/")
199
199
 
200
+ def open_llm_chat(self, _, __):
201
+ """
202
+ Open the LLM chat in the default web browser.
203
+ """
204
+ webbrowser.open(f"http://localhost:{self.port}/#llm-chat")
205
+
200
206
  def open_model_manager(self, _, __):
201
207
  """
202
208
  Open the model manager in the default web browser.
203
209
  """
204
- webbrowser.open(f"http://localhost:{self.port}/")
210
+ webbrowser.open(f"http://localhost:{self.port}/#model-management")
205
211
 
206
212
  def check_server_state(self):
207
213
  """
@@ -266,7 +272,7 @@ class LemonadeTray(SystemTray):
266
272
  self.logger.error(f"Error changing port: {str(e)}")
267
273
  self.show_balloon_notification("Error", f"Failed to change port: {str(e)}")
268
274
 
269
- def upgrade_to_latest(self, icon, item):
275
+ def upgrade_to_latest(self, _, __):
270
276
  """
271
277
  Download and launch the Lemonade Server installer
272
278
  """
@@ -281,21 +287,34 @@ class LemonadeTray(SystemTray):
281
287
  installer_path = os.path.join(
282
288
  tempfile.gettempdir(), "Lemonade_Server_Installer.exe"
283
289
  )
290
+ if os.path.exists(installer_path):
291
+ os.remove(installer_path)
284
292
 
285
293
  # Download the installer
286
294
  response = requests.get(self.latest_version_url, stream=True)
287
295
  response.raise_for_status()
288
296
 
289
- # Save the installer to disk
297
+ # Save the installer to disk and force write to disk
290
298
  with open(installer_path, "wb") as f:
291
299
  for chunk in response.iter_content(chunk_size=8192):
292
300
  f.write(chunk)
301
+ f.flush()
302
+ os.fsync(f.fileno())
293
303
 
294
- # Launch the installer
295
- subprocess.Popen([installer_path], shell=True)
304
+ # Launch the installer as a completely detached process
305
+ # subprocess.DETACHED_PROCESS - Creates a process that's not attached to the console
306
+ # subprocess.CREATE_NEW_PROCESS_GROUP - Creates a new process group
307
+ # close_fds=True - Closes file descriptors to prevent inheritance
308
+ subprocess.Popen(
309
+ [installer_path],
310
+ creationflags=subprocess.DETACHED_PROCESS
311
+ | subprocess.CREATE_NEW_PROCESS_GROUP,
312
+ close_fds=True,
313
+ shell=True,
314
+ cwd=tempfile.gettempdir(),
315
+ )
296
316
 
297
- # Quit the application
298
- self.exit_app(icon, item)
317
+ # No need to quit the application, the installer will handle it
299
318
 
300
319
  def create_menu(self):
301
320
  """
@@ -326,16 +345,25 @@ class LemonadeTray(SystemTray):
326
345
 
327
346
  # Create menu items for all downloaded models
328
347
  model_menu_items = []
329
- for model_name, _ in self.downloaded_models.items():
330
- # Create a function that returns the lambda to properly capture the variables
331
- def create_handler(mod):
332
- return lambda icon, item: self.load_llm(icon, item, mod)
348
+ if not self.downloaded_models:
349
+ model_menu_items.append(
350
+ MenuItem(
351
+ "No models available: Use the Model Manager to pull models",
352
+ None,
353
+ enabled=False,
354
+ )
355
+ )
356
+ else:
357
+ for model_name, _ in self.downloaded_models.items():
358
+ # Create a function that returns the lambda to properly capture the variables
359
+ def create_handler(mod):
360
+ return lambda icon, item: self.load_llm(icon, item, mod)
333
361
 
334
- model_item = MenuItem(model_name, create_handler(model_name))
362
+ model_item = MenuItem(model_name, create_handler(model_name))
335
363
 
336
- # Set checked property instead of modifying the text
337
- model_item.checked = model_name == self.loaded_llm
338
- model_menu_items.append(model_item)
364
+ # Set checked property instead of modifying the text
365
+ model_item.checked = model_name == self.loaded_llm
366
+ model_menu_items.append(model_item)
339
367
 
340
368
  load_submenu = Menu(*model_menu_items)
341
369
 
@@ -378,6 +406,7 @@ class LemonadeTray(SystemTray):
378
406
  )
379
407
 
380
408
  items.append(MenuItem("Documentation", self.open_documentation))
409
+ items.append(MenuItem("LLM Chat", self.open_llm_chat))
381
410
  items.append(MenuItem("Model Manager", self.open_model_manager))
382
411
  items.append(MenuItem("Show Logs", self.show_logs))
383
412
  items.append(Menu.SEPARATOR)
@@ -0,0 +1 @@
1
+ __version__ = "8.0.3"