PyPI - lemonade-sdk - Versions diffs - 8.1.4__py3-none-any.whl → 8.2.2__py3-none-any.whl - Mend

lemonade-sdk 8.1.4py3-none-any.whl → 8.2.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of lemonade-sdk might be problematic. Click here for more details.

Files changed (53) hide show

lemonade/cache.py +6 -1
lemonade/cli.py +47 -5
lemonade/common/inference_engines.py +13 -4
lemonade/common/status.py +4 -4
lemonade/common/system_info.py +544 -1
lemonade/profilers/agt_power.py +437 -0
lemonade/profilers/hwinfo_power.py +429 -0
lemonade/tools/accuracy.py +143 -48
lemonade/tools/adapter.py +6 -1
lemonade/tools/bench.py +26 -8
lemonade/tools/flm/__init__.py +1 -0
lemonade/tools/flm/utils.py +303 -0
lemonade/tools/huggingface/bench.py +6 -1
lemonade/tools/llamacpp/bench.py +146 -27
lemonade/tools/llamacpp/load.py +30 -2
lemonade/tools/llamacpp/utils.py +393 -33
lemonade/tools/oga/bench.py +5 -26
lemonade/tools/oga/load.py +60 -121
lemonade/tools/oga/migration.py +403 -0
lemonade/tools/report/table.py +76 -8
lemonade/tools/server/flm.py +133 -0
lemonade/tools/server/llamacpp.py +220 -553
lemonade/tools/server/serve.py +684 -168
lemonade/tools/server/static/js/chat.js +666 -342
lemonade/tools/server/static/js/model-settings.js +24 -3
lemonade/tools/server/static/js/models.js +597 -73
lemonade/tools/server/static/js/shared.js +79 -14
lemonade/tools/server/static/logs.html +191 -0
lemonade/tools/server/static/styles.css +491 -66
lemonade/tools/server/static/webapp.html +83 -31
lemonade/tools/server/tray.py +158 -38
lemonade/tools/server/utils/macos_tray.py +226 -0
lemonade/tools/server/utils/{system_tray.py → windows_tray.py} +13 -0
lemonade/tools/server/webapp.py +4 -1
lemonade/tools/server/wrapped_server.py +559 -0
lemonade/version.py +1 -1
lemonade_install/install.py +54 -611
{lemonade_sdk-8.1.4.dist-info → lemonade_sdk-8.2.2.dist-info}/METADATA +29 -72
lemonade_sdk-8.2.2.dist-info/RECORD +83 -0
lemonade_server/cli.py +145 -37
lemonade_server/model_manager.py +521 -37
lemonade_server/pydantic_models.py +28 -1
lemonade_server/server_models.json +246 -92
lemonade_server/settings.py +39 -39
lemonade/tools/quark/__init__.py +0 -0
lemonade/tools/quark/quark_load.py +0 -173
lemonade/tools/quark/quark_quantize.py +0 -439
lemonade_sdk-8.1.4.dist-info/RECORD +0 -77
{lemonade_sdk-8.1.4.dist-info → lemonade_sdk-8.2.2.dist-info}/WHEEL +0 -0
{lemonade_sdk-8.1.4.dist-info → lemonade_sdk-8.2.2.dist-info}/entry_points.txt +0 -0
{lemonade_sdk-8.1.4.dist-info → lemonade_sdk-8.2.2.dist-info}/licenses/LICENSE +0 -0
{lemonade_sdk-8.1.4.dist-info → lemonade_sdk-8.2.2.dist-info}/licenses/NOTICE.md +0 -0
{lemonade_sdk-8.1.4.dist-info → lemonade_sdk-8.2.2.dist-info}/top_level.txt +0 -0

lemonade/tools/llamacpp/load.py CHANGED Viewed

@@ -93,9 +93,11 @@ class LoadLlamaCpp(FirstTool):
         from lemonade.tools.llamacpp.utils import (
             install_llamacpp,
             get_llama_cli_exe_path,
+            get_llama_bench_exe_path,
             get_llama_installed_version,
             parse_checkpoint,
             download_gguf,
+            resolve_local_gguf_model,
             get_local_checkpoint_path,
             LlamaCppTokenizerAdapter,
             LlamaCppAdapter,
@@ -103,6 +105,8 @@ class LoadLlamaCpp(FirstTool):
         install_llamacpp(backend)
+        extension = ""
         # Check if input is a local folder containing a .GGUF model
         if os.path.isdir(input):
             # input is a local folder
@@ -121,6 +125,17 @@ class LoadLlamaCpp(FirstTool):
                 )
             model_to_use = gguf_files[0]
             full_model_path = os.path.join(local_model_folder, model_to_use)
+            extension = ".gguf"
+        elif input.endswith(".gguf") and os.path.isfile(input):
+            # input is a local .gguf file
+            full_model_path = os.path.abspath(input)
+            checkpoint = "local_model"
+            state.checkpoint = checkpoint
+            state.save_stat(Keys.CHECKPOINT, checkpoint)
+            state.save_stat(Keys.LOCAL_MODEL_FOLDER, full_model_path)
+            model_to_use = os.path.basename(full_model_path)
+            extension = ".gguf"
         else:
             # Input is a model checkpoint
@@ -155,12 +170,21 @@ class LoadLlamaCpp(FirstTool):
                     )
             else:
+                # First, try to resolve from local cache to avoid unnecessary downloads
+                base_checkpoint, variant = parse_checkpoint(checkpoint)
+                snapshot_files = resolve_local_gguf_model(
+                    base_checkpoint, variant, None
+                )
+                # If not found locally, download from internet
+                if not snapshot_files:
+                    snapshot_files = download_gguf(checkpoint)
-                snapshot_files = download_gguf(checkpoint)
                 full_model_path = snapshot_files["variant"]
                 model_to_use = os.path.basename(full_model_path)
         llama_cli_exe_path = get_llama_cli_exe_path(backend)
+        llama_bench_exe_path = get_llama_bench_exe_path(backend)
         printing.log_info(f"Using llama_cli for GGUF model: {llama_cli_exe_path}")
         # Get the directory containing the executable for shared libraries
@@ -174,8 +198,10 @@ class LoadLlamaCpp(FirstTool):
             context_size=context_size,
             threads=threads,
             executable=llama_cli_exe_path,
+            bench_executable=llama_bench_exe_path,
             reasoning=reasoning,
             lib_dir=lib_dir,
+            state=state,
         )
         state.tokenizer = LlamaCppTokenizerAdapter()
         state.device = device
@@ -186,7 +212,9 @@ class LoadLlamaCpp(FirstTool):
             Keys.LLAMA_CLI_VERSION_INFO, get_llama_installed_version(backend)
         )
-        status.add_to_state(state=state, name=input, model=model_to_use)
+        status.add_to_state(
+            state=state, name=input, model=model_to_use, extension=extension
+        )
         return state

lemonade-sdk 8.1.4__py3-none-any.whl → 8.2.2__py3-none-any.whl

Potentially problematic release.

lemonade-sdk 8.1.4py3-none-any.whl → 8.2.2py3-none-any.whl