lemonade-sdk 9.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. lemonade/__init__.py +5 -0
  2. lemonade/api.py +180 -0
  3. lemonade/cache.py +92 -0
  4. lemonade/cli.py +173 -0
  5. lemonade/common/__init__.py +0 -0
  6. lemonade/common/build.py +176 -0
  7. lemonade/common/cli_helpers.py +139 -0
  8. lemonade/common/exceptions.py +98 -0
  9. lemonade/common/filesystem.py +368 -0
  10. lemonade/common/inference_engines.py +408 -0
  11. lemonade/common/network.py +93 -0
  12. lemonade/common/printing.py +110 -0
  13. lemonade/common/status.py +471 -0
  14. lemonade/common/system_info.py +1411 -0
  15. lemonade/common/test_helpers.py +28 -0
  16. lemonade/profilers/__init__.py +1 -0
  17. lemonade/profilers/agt_power.py +437 -0
  18. lemonade/profilers/hwinfo_power.py +429 -0
  19. lemonade/profilers/memory_tracker.py +259 -0
  20. lemonade/profilers/profiler.py +58 -0
  21. lemonade/sequence.py +363 -0
  22. lemonade/state.py +159 -0
  23. lemonade/tools/__init__.py +1 -0
  24. lemonade/tools/accuracy.py +432 -0
  25. lemonade/tools/adapter.py +114 -0
  26. lemonade/tools/bench.py +302 -0
  27. lemonade/tools/flm/__init__.py +1 -0
  28. lemonade/tools/flm/utils.py +305 -0
  29. lemonade/tools/huggingface/bench.py +187 -0
  30. lemonade/tools/huggingface/load.py +235 -0
  31. lemonade/tools/huggingface/utils.py +359 -0
  32. lemonade/tools/humaneval.py +264 -0
  33. lemonade/tools/llamacpp/bench.py +255 -0
  34. lemonade/tools/llamacpp/load.py +222 -0
  35. lemonade/tools/llamacpp/utils.py +1260 -0
  36. lemonade/tools/management_tools.py +319 -0
  37. lemonade/tools/mmlu.py +319 -0
  38. lemonade/tools/oga/__init__.py +0 -0
  39. lemonade/tools/oga/bench.py +120 -0
  40. lemonade/tools/oga/load.py +804 -0
  41. lemonade/tools/oga/migration.py +403 -0
  42. lemonade/tools/oga/utils.py +462 -0
  43. lemonade/tools/perplexity.py +147 -0
  44. lemonade/tools/prompt.py +263 -0
  45. lemonade/tools/report/__init__.py +0 -0
  46. lemonade/tools/report/llm_report.py +203 -0
  47. lemonade/tools/report/table.py +899 -0
  48. lemonade/tools/server/__init__.py +0 -0
  49. lemonade/tools/server/flm.py +133 -0
  50. lemonade/tools/server/llamacpp.py +320 -0
  51. lemonade/tools/server/serve.py +2123 -0
  52. lemonade/tools/server/static/favicon.ico +0 -0
  53. lemonade/tools/server/static/index.html +279 -0
  54. lemonade/tools/server/static/js/chat.js +1059 -0
  55. lemonade/tools/server/static/js/model-settings.js +183 -0
  56. lemonade/tools/server/static/js/models.js +1395 -0
  57. lemonade/tools/server/static/js/shared.js +556 -0
  58. lemonade/tools/server/static/logs.html +191 -0
  59. lemonade/tools/server/static/styles.css +2654 -0
  60. lemonade/tools/server/static/webapp.html +321 -0
  61. lemonade/tools/server/tool_calls.py +153 -0
  62. lemonade/tools/server/tray.py +664 -0
  63. lemonade/tools/server/utils/macos_tray.py +226 -0
  64. lemonade/tools/server/utils/port.py +77 -0
  65. lemonade/tools/server/utils/thread.py +85 -0
  66. lemonade/tools/server/utils/windows_tray.py +408 -0
  67. lemonade/tools/server/webapp.py +34 -0
  68. lemonade/tools/server/wrapped_server.py +559 -0
  69. lemonade/tools/tool.py +374 -0
  70. lemonade/version.py +1 -0
  71. lemonade_install/__init__.py +1 -0
  72. lemonade_install/install.py +239 -0
  73. lemonade_sdk-9.1.1.dist-info/METADATA +276 -0
  74. lemonade_sdk-9.1.1.dist-info/RECORD +84 -0
  75. lemonade_sdk-9.1.1.dist-info/WHEEL +5 -0
  76. lemonade_sdk-9.1.1.dist-info/entry_points.txt +5 -0
  77. lemonade_sdk-9.1.1.dist-info/licenses/LICENSE +201 -0
  78. lemonade_sdk-9.1.1.dist-info/licenses/NOTICE.md +47 -0
  79. lemonade_sdk-9.1.1.dist-info/top_level.txt +3 -0
  80. lemonade_server/cli.py +805 -0
  81. lemonade_server/model_manager.py +758 -0
  82. lemonade_server/pydantic_models.py +159 -0
  83. lemonade_server/server_models.json +643 -0
  84. lemonade_server/settings.py +39 -0
@@ -0,0 +1,120 @@
1
+ import argparse
2
+ import statistics
3
+ from statistics import StatisticsError
4
+ import psutil
5
+ from lemonade.state import State
6
+ from lemonade.tools.adapter import ModelAdapter, TokenizerAdapter
7
+ from lemonade.tools.bench import Bench
8
+
9
+
10
+ class OgaBench(Bench):
11
+ """
12
+ Benchmark any model that adheres to the ModelAdapter interface.
13
+
14
+ Required input state:
15
+ - MODEL: model instance to benchmark.
16
+ - TOKENIZER: tokenizer instance used to generate inputs for the model.
17
+
18
+ Output state produced: None
19
+ """
20
+
21
+ unique_name = "oga-bench"
22
+
23
+ @staticmethod
24
+ def parser(add_help: bool = True) -> argparse.ArgumentParser:
25
+ parser = __class__.helpful_parser(
26
+ short_description="Benchmark an LLM in onnxruntime-genai (OGA)",
27
+ add_help=add_help,
28
+ )
29
+
30
+ parser = Bench.parser(parser)
31
+
32
+ return parser
33
+
34
+ def get_prompt_str(self, state, token_length):
35
+ """
36
+ Returns a string with the prescribed token length.
37
+ """
38
+ tokenizer: TokenizerAdapter = state.tokenizer
39
+ test_prompt = "word " * (token_length - 1)
40
+ input_ids = tokenizer(test_prompt, return_tensors="pt").input_ids
41
+ test_token_length = len(input_ids)
42
+ delta = test_token_length - token_length
43
+ if delta == 0:
44
+ return test_prompt
45
+ return "word " * max(token_length - 1 - delta, 0)
46
+
47
+ def run_prompt(
48
+ self,
49
+ state: State,
50
+ report_progress_fn,
51
+ prompt: str,
52
+ iterations: int,
53
+ warmup_iterations: int,
54
+ output_tokens: int,
55
+ ):
56
+
57
+ model: ModelAdapter = state.model
58
+ tokenizer: TokenizerAdapter = state.tokenizer
59
+
60
+ input_ids = tokenizer(prompt, return_tensors="pt").input_ids
61
+ self.input_ids_len_list.append(len(input_ids))
62
+ per_iteration_time_to_first_token = []
63
+ per_iteration_tokens_per_second = []
64
+
65
+ # Don't capture time for warmup
66
+ for count in range(warmup_iterations):
67
+ _ = model.generate(input_ids, max_new_tokens=output_tokens)
68
+ self.tokens_out_len_list.append(model.response_tokens)
69
+ report_progress_fn((count + 1) / (warmup_iterations + iterations))
70
+
71
+ for count in range(iterations):
72
+ _ = model.generate(
73
+ input_ids,
74
+ max_new_tokens=output_tokens,
75
+ min_new_tokens=output_tokens,
76
+ )
77
+ report_progress_fn(
78
+ (warmup_iterations + count + 1) / (warmup_iterations + iterations)
79
+ )
80
+
81
+ self.tokens_out_len_list.append(model.response_tokens)
82
+
83
+ # Only count an iteration if it produced enough tokens
84
+ if model.response_tokens >= output_tokens:
85
+ per_iteration_time_to_first_token.append(model.time_to_first_token)
86
+ per_iteration_tokens_per_second.append(model.tokens_per_second)
87
+
88
+ if not per_iteration_time_to_first_token or not per_iteration_tokens_per_second:
89
+ raise Bench.not_enough_tokens(output_tokens)
90
+
91
+ mean_time_to_first_token = statistics.mean(per_iteration_time_to_first_token)
92
+ self.mean_time_to_first_token_list.append(mean_time_to_first_token)
93
+ self.prefill_tokens_per_second_list.append(
94
+ len(input_ids) / mean_time_to_first_token
95
+ )
96
+ self.token_generation_tokens_per_second_list.append(
97
+ statistics.mean(per_iteration_tokens_per_second)
98
+ )
99
+ try:
100
+ self.std_dev_time_to_first_token_list.append(
101
+ statistics.stdev(per_iteration_time_to_first_token)
102
+ )
103
+ except StatisticsError:
104
+ # Less than 2 measurements
105
+ self.std_dev_time_to_first_token_list.append(None)
106
+ try:
107
+ self.std_dev_token_generation_tokens_per_second_list.append(
108
+ statistics.stdev(per_iteration_tokens_per_second)
109
+ )
110
+ except StatisticsError:
111
+ # Less than 2 measurements
112
+ self.std_dev_token_generation_tokens_per_second_list.append(None)
113
+ if self.save_max_memory_used:
114
+ self.max_memory_used_gb_list.append(
115
+ psutil.Process().memory_info().peak_wset / 1024**3
116
+ )
117
+
118
+
119
+ # This file was originally licensed under Apache 2.0. It has been modified.
120
+ # Modifications Copyright (c) 2025 AMD