PyPI - lemonade-sdk - Versions diffs - 8.1.11__py3-none-any.whl → 8.2.2__py3-none-any.whl - Mend

lemonade-sdk 8.1.11py3-none-any.whl → 8.2.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of lemonade-sdk might be problematic. Click here for more details.

Files changed (38) hide show

lemonade/cache.py +6 -1
lemonade/common/status.py +4 -4
lemonade/common/system_info.py +0 -26
lemonade/tools/accuracy.py +143 -48
lemonade/tools/adapter.py +6 -1
lemonade/tools/bench.py +26 -8
lemonade/tools/flm/utils.py +70 -22
lemonade/tools/huggingface/bench.py +6 -1
lemonade/tools/llamacpp/bench.py +146 -27
lemonade/tools/llamacpp/load.py +30 -2
lemonade/tools/llamacpp/utils.py +317 -21
lemonade/tools/oga/bench.py +5 -26
lemonade/tools/oga/load.py +49 -123
lemonade/tools/oga/migration.py +403 -0
lemonade/tools/report/table.py +76 -8
lemonade/tools/server/flm.py +2 -6
lemonade/tools/server/llamacpp.py +43 -2
lemonade/tools/server/serve.py +354 -18
lemonade/tools/server/static/js/chat.js +15 -77
lemonade/tools/server/static/js/model-settings.js +24 -3
lemonade/tools/server/static/js/models.js +440 -37
lemonade/tools/server/static/js/shared.js +61 -8
lemonade/tools/server/static/logs.html +157 -13
lemonade/tools/server/static/styles.css +204 -0
lemonade/tools/server/static/webapp.html +39 -1
lemonade/version.py +1 -1
lemonade_install/install.py +33 -579
{lemonade_sdk-8.1.11.dist-info → lemonade_sdk-8.2.2.dist-info}/METADATA +6 -4
{lemonade_sdk-8.1.11.dist-info → lemonade_sdk-8.2.2.dist-info}/RECORD +38 -37
lemonade_server/cli.py +10 -0
lemonade_server/model_manager.py +172 -11
lemonade_server/pydantic_models.py +3 -0
lemonade_server/server_models.json +102 -66
{lemonade_sdk-8.1.11.dist-info → lemonade_sdk-8.2.2.dist-info}/WHEEL +0 -0
{lemonade_sdk-8.1.11.dist-info → lemonade_sdk-8.2.2.dist-info}/entry_points.txt +0 -0
{lemonade_sdk-8.1.11.dist-info → lemonade_sdk-8.2.2.dist-info}/licenses/LICENSE +0 -0
{lemonade_sdk-8.1.11.dist-info → lemonade_sdk-8.2.2.dist-info}/licenses/NOTICE.md +0 -0
{lemonade_sdk-8.1.11.dist-info → lemonade_sdk-8.2.2.dist-info}/top_level.txt +0 -0

lemonade/tools/report/table.py CHANGED Viewed

@@ -48,6 +48,18 @@ def _merge_join(str1, str2) -> str:
     return str1 + ("\n" if str1 and str2 else "") + str2
+def _window_sum(data: list, n_windows: int) -> list:
+    """Sums data into n_windows windows"""
+    if n_windows <= 0:
+        return data
+    window_size = max(1, len(data) // n_windows)
+    summed_data = []
+    for i in range(0, len(data), window_size):
+        window_sum = sum(data[i : i + window_size])
+        summed_data.append(window_sum)
+    return summed_data
 ################################################################################
 # CLASSES THAT DESCRIBE TEXT TABLE COLUMNS
 ################################################################################
@@ -88,10 +100,54 @@ class SimpleStat(TableColumn):
         if lean and self.omit_if_lean:
             return None
         data = build_stats.get(self.stat, None)
-        if data is None:
-            return ""
+        if data is None or (data == []):
+            return "-"
         if self.stat_fn:
             data = self.stat_fn(data)
+        cell_str = "\n".join(
+            [
+                _wrap("-" if x is None else f"{x:{self.format_str}}", self.wrap)
+                for x in _to_list(data)
+            ]
+        )
+        return cell_str
+class DependentStat(TableColumn):
+    """
+    These are for statistics already declared by the tool or basic build stats that
+    rely on one or more additional stats to compute their value.  The dependency is
+    embodied by the stat_fn function.
+    """
+    def __init__(
+        self,
+        column_header,
+        stats,
+        format_str,
+        align="center",
+        omit_if_lean=False,
+        wrap=None,
+        stat_fn=None,
+    ):
+        self.column_header = column_header
+        self.stats = stats
+        self.format_str = format_str
+        self.align = align
+        self.omit_if_lean = omit_if_lean
+        self.wrap = wrap or self.default_wrap
+        self.stat_fn = stat_fn
+    def get_str(self, build_stats, lean=False):
+        if lean and self.omit_if_lean:
+            return None
+        stats_data = [build_stats.get(stat, None) for stat in self.stats]
+        if self.stat_fn:
+            data = self.stat_fn(stats_data)
+        else:
+            data = stats_data[0]
+        if data is None or (data == []):
+            return "-"
         cell_str = "\n".join(
             [_wrap(f"{x:{self.format_str}}", self.wrap) for x in _to_list(data)]
         )
@@ -434,10 +490,12 @@ class Table(ABC):
             row = []
             # First columns
+            first_columns_count = 0
             for entry in first_columns:
                 entry_str = entry.get_str(build_stats, self.lean)
                 if entry_str is not None:
                     row.append(entry_str)
+                    first_columns_count += 1
             # Per tool columns
             for tool in tools:
@@ -460,22 +518,24 @@ class Table(ABC):
                     row.append(entry_str)
             # Final columns
+            last_columns_count = 0
             for entry in last_columns:
                 entry_str = entry.get_str(build_stats, self.lean)
                 if entry_str is not None:
                     row.append(entry_str)
+                    last_columns_count += 1
             # See if this row should be merged with the last row
             if last_build_stats and self.merge_test_fn(last_build_stats, build_stats):
                 # Merge with last row
-                for col in range(0, len(first_columns)):
+                for col in range(0, first_columns_count):
                     # If identical, don't duplicate
                     if last_row[col] != row[col]:
                         last_row[col] = _merge_join(last_row[col], row[col])
-                for col in range(len(first_columns), len(row) - len(last_columns)):
+                for col in range(first_columns_count, len(row) - last_columns_count):
                     # Allow duplicates
                     last_row[col] = _merge_join(last_row[col], row[col])
-                for col in range(len(row) - len(last_columns), len(row)):
+                for col in range(len(row) - last_columns_count, len(row)):
                     # If identical, don't duplicate
                     if last_row[col] != row[col]:
                         last_row[col] = _merge_join(last_row[col], row[col])
@@ -581,17 +641,25 @@ class LemonadePerfTable(Table):
                     Keys.STD_DEV_SECONDS_TO_FIRST_TOKEN,
                     ".2f",
                 ),
+                StatWithSD(
+                    _wrap("Prefill Tokens per Second", 8),
+                    Keys.PREFILL_TOKENS_PER_SECOND,
+                    Keys.STD_DEV_PREFILL_TOKENS_PER_SECOND,
+                    ".2f",
+                ),
                 StatWithSD(
                     _wrap("Tokens per Second", 8),
                     Keys.TOKEN_GENERATION_TOKENS_PER_SECOND,
                     Keys.STD_DEV_TOKENS_PER_SECOND,
                     ".2f",
                 ),
-                SimpleStat(
+                DependentStat(
                     _wrap("Total Generated Tokens", 9),
-                    Keys.RESPONSE_TOKENS,
+                    [Keys.RESPONSE_TOKENS, Keys.PROMPT_TOKENS],
                     "d",
-                    stat_fn=lambda x: sum(_to_list(x)),
+                    stat_fn=lambda x: _window_sum(
+                        _to_list(x[0]), n_windows=len(_to_list(x[1]))
+                    ),
                 ),
                 SimpleStat(
                     _wrap("Memory Used (GB)", 8), Keys.MAX_MEMORY_USED_GBYTE, ".3f"

lemonade/tools/server/flm.py CHANGED Viewed

@@ -42,12 +42,6 @@ class FlmServer(WrappedServer):
         self.flm_model_name = None
         super().__init__(server_name="flm-server", telemetry=FlmTelemetry())
-    def _choose_port(self):
-        """
-        `flm serve` doesn't support port selection as of v0.9.10
-        """
-        self.port = 11434
     def address(self):
         return f"http://localhost:{self.port}/v1"
@@ -83,6 +77,8 @@ class FlmServer(WrappedServer):
             f"{self.flm_model_name}",
             "--ctx-len",
             str(ctx_size),
+            "--port",
+            str(self.port),
         ]
         # Set up environment with library path for Linux

lemonade/tools/server/llamacpp.py CHANGED Viewed

@@ -15,9 +15,16 @@ from lemonade.tools.llamacpp.utils import (
     get_llama_server_exe_path,
     install_llamacpp,
     download_gguf,
+    resolve_local_gguf_model,
+    parse_checkpoint,
 )
 from lemonade.tools.server.wrapped_server import WrappedServerTelemetry, WrappedServer
+# Embedding model batch configuration set to 8192 as default
+EMBEDDING_CTX_SIZE = 8192
+EMBEDDING_BATCH_SIZE = 8192
+EMBEDDING_UBATCH_SIZE = 8192
 class LlamaTelemetry(WrappedServerTelemetry):
     """
@@ -101,8 +108,25 @@ class LlamaServer(WrappedServer):
         self, config_checkpoint, config_mmproj=None, do_not_upgrade=False
     ) -> dict:
         """
-        Download a model for the wrapper server
+        Download a model for the wrapper server.
+        First checks local cache, then downloads from internet if needed.
         """
+        # If it's a direct file path, just return it
+        if os.path.exists(config_checkpoint):
+            result = {"variant": config_checkpoint}
+            if config_mmproj:
+                result["mmproj"] = config_mmproj
+            return result
+        # Try to resolve from local cache first to avoid unnecessary downloads
+        checkpoint, variant = parse_checkpoint(config_checkpoint)
+        local_result = resolve_local_gguf_model(checkpoint, variant, config_mmproj)
+        if local_result:
+            return local_result
+        # Not found locally - download from internet
         return download_gguf(
             config_checkpoint=config_checkpoint,
             config_mmproj=config_mmproj,
@@ -135,6 +159,12 @@ class LlamaServer(WrappedServer):
         # Get the current executable path (handles both Windows and Ubuntu structures)
         exe_path = get_llama_server_exe_path(self.backend)
+        # For embedding models, use a larger context size to support longer individual
+        # strings. Embedding requests can include multiple strings in a batch, and each
+        # string needs to fit within the context window.
+        if supports_embeddings and ctx_size < EMBEDDING_CTX_SIZE:
+            ctx_size = EMBEDDING_CTX_SIZE
         # Build the base command
         base_command = [
             exe_path,
@@ -180,7 +210,18 @@ class LlamaServer(WrappedServer):
         # Add embeddings support if the model supports it
         if supports_embeddings:
-            base_command.append("--embeddings")
+            # For embedding models, set batch sizes to handle multiple documents in a single request
+            # batch-size: logical batch size (total tokens across all sequences)
+            # ubatch-size: physical batch size (tokens processed in a single forward pass)
+            base_command.extend(
+                [
+                    "--embeddings",
+                    "--batch-size",
+                    str(EMBEDDING_BATCH_SIZE),
+                    "--ubatch-size",
+                    str(EMBEDDING_UBATCH_SIZE),
+                ]
+            )
         # Add reranking support if the model supports it
         if supports_reranking:

lemonade-sdk 8.1.11__py3-none-any.whl → 8.2.2__py3-none-any.whl

Potentially problematic release.

lemonade-sdk 8.1.11py3-none-any.whl → 8.2.2py3-none-any.whl