lemonade-sdk 8.1.4__py3-none-any.whl → 8.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lemonade-sdk might be problematic. Click here for more details.

Files changed (53) hide show
  1. lemonade/cache.py +6 -1
  2. lemonade/cli.py +47 -5
  3. lemonade/common/inference_engines.py +13 -4
  4. lemonade/common/status.py +4 -4
  5. lemonade/common/system_info.py +544 -1
  6. lemonade/profilers/agt_power.py +437 -0
  7. lemonade/profilers/hwinfo_power.py +429 -0
  8. lemonade/tools/accuracy.py +143 -48
  9. lemonade/tools/adapter.py +6 -1
  10. lemonade/tools/bench.py +26 -8
  11. lemonade/tools/flm/__init__.py +1 -0
  12. lemonade/tools/flm/utils.py +303 -0
  13. lemonade/tools/huggingface/bench.py +6 -1
  14. lemonade/tools/llamacpp/bench.py +146 -27
  15. lemonade/tools/llamacpp/load.py +30 -2
  16. lemonade/tools/llamacpp/utils.py +393 -33
  17. lemonade/tools/oga/bench.py +5 -26
  18. lemonade/tools/oga/load.py +60 -121
  19. lemonade/tools/oga/migration.py +403 -0
  20. lemonade/tools/report/table.py +76 -8
  21. lemonade/tools/server/flm.py +133 -0
  22. lemonade/tools/server/llamacpp.py +220 -553
  23. lemonade/tools/server/serve.py +684 -168
  24. lemonade/tools/server/static/js/chat.js +666 -342
  25. lemonade/tools/server/static/js/model-settings.js +24 -3
  26. lemonade/tools/server/static/js/models.js +597 -73
  27. lemonade/tools/server/static/js/shared.js +79 -14
  28. lemonade/tools/server/static/logs.html +191 -0
  29. lemonade/tools/server/static/styles.css +491 -66
  30. lemonade/tools/server/static/webapp.html +83 -31
  31. lemonade/tools/server/tray.py +158 -38
  32. lemonade/tools/server/utils/macos_tray.py +226 -0
  33. lemonade/tools/server/utils/{system_tray.py → windows_tray.py} +13 -0
  34. lemonade/tools/server/webapp.py +4 -1
  35. lemonade/tools/server/wrapped_server.py +559 -0
  36. lemonade/version.py +1 -1
  37. lemonade_install/install.py +54 -611
  38. {lemonade_sdk-8.1.4.dist-info → lemonade_sdk-8.2.2.dist-info}/METADATA +29 -72
  39. lemonade_sdk-8.2.2.dist-info/RECORD +83 -0
  40. lemonade_server/cli.py +145 -37
  41. lemonade_server/model_manager.py +521 -37
  42. lemonade_server/pydantic_models.py +28 -1
  43. lemonade_server/server_models.json +246 -92
  44. lemonade_server/settings.py +39 -39
  45. lemonade/tools/quark/__init__.py +0 -0
  46. lemonade/tools/quark/quark_load.py +0 -173
  47. lemonade/tools/quark/quark_quantize.py +0 -439
  48. lemonade_sdk-8.1.4.dist-info/RECORD +0 -77
  49. {lemonade_sdk-8.1.4.dist-info → lemonade_sdk-8.2.2.dist-info}/WHEEL +0 -0
  50. {lemonade_sdk-8.1.4.dist-info → lemonade_sdk-8.2.2.dist-info}/entry_points.txt +0 -0
  51. {lemonade_sdk-8.1.4.dist-info → lemonade_sdk-8.2.2.dist-info}/licenses/LICENSE +0 -0
  52. {lemonade_sdk-8.1.4.dist-info → lemonade_sdk-8.2.2.dist-info}/licenses/NOTICE.md +0 -0
  53. {lemonade_sdk-8.1.4.dist-info → lemonade_sdk-8.2.2.dist-info}/top_level.txt +0 -0
@@ -93,9 +93,11 @@ class LoadLlamaCpp(FirstTool):
93
93
  from lemonade.tools.llamacpp.utils import (
94
94
  install_llamacpp,
95
95
  get_llama_cli_exe_path,
96
+ get_llama_bench_exe_path,
96
97
  get_llama_installed_version,
97
98
  parse_checkpoint,
98
99
  download_gguf,
100
+ resolve_local_gguf_model,
99
101
  get_local_checkpoint_path,
100
102
  LlamaCppTokenizerAdapter,
101
103
  LlamaCppAdapter,
@@ -103,6 +105,8 @@ class LoadLlamaCpp(FirstTool):
103
105
 
104
106
  install_llamacpp(backend)
105
107
 
108
+ extension = ""
109
+
106
110
  # Check if input is a local folder containing a .GGUF model
107
111
  if os.path.isdir(input):
108
112
  # input is a local folder
@@ -121,6 +125,17 @@ class LoadLlamaCpp(FirstTool):
121
125
  )
122
126
  model_to_use = gguf_files[0]
123
127
  full_model_path = os.path.join(local_model_folder, model_to_use)
128
+ extension = ".gguf"
129
+
130
+ elif input.endswith(".gguf") and os.path.isfile(input):
131
+ # input is a local .gguf file
132
+ full_model_path = os.path.abspath(input)
133
+ checkpoint = "local_model"
134
+ state.checkpoint = checkpoint
135
+ state.save_stat(Keys.CHECKPOINT, checkpoint)
136
+ state.save_stat(Keys.LOCAL_MODEL_FOLDER, full_model_path)
137
+ model_to_use = os.path.basename(full_model_path)
138
+ extension = ".gguf"
124
139
 
125
140
  else:
126
141
  # Input is a model checkpoint
@@ -155,12 +170,21 @@ class LoadLlamaCpp(FirstTool):
155
170
  )
156
171
 
157
172
  else:
173
+ # First, try to resolve from local cache to avoid unnecessary downloads
174
+ base_checkpoint, variant = parse_checkpoint(checkpoint)
175
+ snapshot_files = resolve_local_gguf_model(
176
+ base_checkpoint, variant, None
177
+ )
178
+
179
+ # If not found locally, download from internet
180
+ if not snapshot_files:
181
+ snapshot_files = download_gguf(checkpoint)
158
182
 
159
- snapshot_files = download_gguf(checkpoint)
160
183
  full_model_path = snapshot_files["variant"]
161
184
  model_to_use = os.path.basename(full_model_path)
162
185
 
163
186
  llama_cli_exe_path = get_llama_cli_exe_path(backend)
187
+ llama_bench_exe_path = get_llama_bench_exe_path(backend)
164
188
  printing.log_info(f"Using llama_cli for GGUF model: {llama_cli_exe_path}")
165
189
 
166
190
  # Get the directory containing the executable for shared libraries
@@ -174,8 +198,10 @@ class LoadLlamaCpp(FirstTool):
174
198
  context_size=context_size,
175
199
  threads=threads,
176
200
  executable=llama_cli_exe_path,
201
+ bench_executable=llama_bench_exe_path,
177
202
  reasoning=reasoning,
178
203
  lib_dir=lib_dir,
204
+ state=state,
179
205
  )
180
206
  state.tokenizer = LlamaCppTokenizerAdapter()
181
207
  state.device = device
@@ -186,7 +212,9 @@ class LoadLlamaCpp(FirstTool):
186
212
  Keys.LLAMA_CLI_VERSION_INFO, get_llama_installed_version(backend)
187
213
  )
188
214
 
189
- status.add_to_state(state=state, name=input, model=model_to_use)
215
+ status.add_to_state(
216
+ state=state, name=input, model=model_to_use, extension=extension
217
+ )
190
218
  return state
191
219
 
192
220