PyPI - lemonade-sdk - Versions diffs - 8.0.2__py3-none-any.whl → 8.0.4__py3-none-any.whl - Mend

lemonade-sdk 8.0.2py3-none-any.whl → 8.0.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of lemonade-sdk might be problematic. Click here for more details.

Files changed (26) hide show

lemonade/cli.py +2 -2
lemonade/profilers/profiler.py +4 -1
lemonade/tools/humaneval.py +1 -1
lemonade/tools/mmlu.py +1 -1
lemonade/tools/oga/load.py +3 -9
lemonade/tools/perplexity.py +2 -2
lemonade/tools/prompt.py +21 -6
lemonade/tools/quark/quark_load.py +1 -1
lemonade/tools/quark/quark_quantize.py +2 -2
lemonade/tools/report/table.py +80 -0
lemonade/tools/server/llamacpp.py +148 -16
lemonade/tools/server/serve.py +73 -0
lemonade/tools/server/static/styles.css +424 -4
lemonade/tools/server/static/webapp.html +337 -38
lemonade/tools/server/tray.py +25 -9
lemonade/version.py +1 -1
{lemonade_sdk-8.0.2.dist-info → lemonade_sdk-8.0.4.dist-info}/METADATA +33 -36
{lemonade_sdk-8.0.2.dist-info → lemonade_sdk-8.0.4.dist-info}/RECORD +26 -26
lemonade_server/model_manager.py +123 -36
lemonade_server/pydantic_models.py +25 -1
lemonade_server/server_models.json +53 -43
{lemonade_sdk-8.0.2.dist-info → lemonade_sdk-8.0.4.dist-info}/WHEEL +0 -0
{lemonade_sdk-8.0.2.dist-info → lemonade_sdk-8.0.4.dist-info}/entry_points.txt +0 -0
{lemonade_sdk-8.0.2.dist-info → lemonade_sdk-8.0.4.dist-info}/licenses/LICENSE +0 -0
{lemonade_sdk-8.0.2.dist-info → lemonade_sdk-8.0.4.dist-info}/licenses/NOTICE.md +0 -0
{lemonade_sdk-8.0.2.dist-info → lemonade_sdk-8.0.4.dist-info}/top_level.txt +0 -0

{lemonade_sdk-8.0.2.dist-info → lemonade_sdk-8.0.4.dist-info}/RECORD RENAMED Viewed

@@ -1,10 +1,10 @@
 lemonade/__init__.py,sha256=W1Qk7r0rnQqFhPNHp6BIBT_q-OH3s-8Q_POoVfAmKW0,117
 lemonade/api.py,sha256=X7DxBgsOl5L_z6uTkwoJWf8x0rjXWS2JoeEqmo9bMfc,3873
 lemonade/cache.py,sha256=djr2qgyUUAWlQv8FehU9qlNtCwK0IZqo82hcBDyZ3-A,2850
-lemonade/cli.py,sha256=XzptHh6LTl5OdGRnxiLykQ8QBl2rQmhWH5w0KPJVyY4,4359
+lemonade/cli.py,sha256=9Pcs3PcrWC2F8_pcBaz09xHUICIJTvpemBdPGyXkjIk,4395
 lemonade/sequence.py,sha256=KSH7BPsiyDKsOsg_ziQKEGsDwMmuO_YbgPRBxkZd0pw,13267
 lemonade/state.py,sha256=sdSezla7Cd7KYL90xY3p9kcNV4ndSyN6UvNLOr3vBMA,5261
-lemonade/version.py,sha256=hTIZ_8cc-ggqcFeOYQQKOHudFQCQNQlM4ZltuYIIjD4,22
+lemonade/version.py,sha256=8H4GfArMIlRTCgSsTERRXsD3PA6Y67z17oTQOJnuUME,22
 lemonade/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 lemonade/common/build.py,sha256=zTb0m1-kuUx6zw5QHp2SNnVuN6jOTMQ2FCdj9iH374U,6140
 lemonade/common/cli_helpers.py,sha256=hjBfXrTtFl8gmCFlL-ksviXR0mOcdPtTWVNKoEp3PG4,4993
@@ -17,16 +17,16 @@ lemonade/common/system_info.py,sha256=qOwteG_mBo-ImilbiK7Gq37sWIE9ugF0dbWcj9zLD4
 lemonade/common/test_helpers.py,sha256=Gwk-pa_6xYAo2oro-2EJNfuouAfw8k_brCbcMC-E-r0,758
 lemonade/profilers/__init__.py,sha256=JKVonvJ4XZ9_6sKXPWsiMLQCNyzQOxhQw5BEHR1qOfU,31
 lemonade/profilers/memory_tracker.py,sha256=1iuKt0FmNVYLDnOc-oZM8dX9TUksvoxO0m2EoYWjhYQ,9367
-lemonade/profilers/profiler.py,sha256=y_iMGr1ToQ6rcwcIcXck4ajapisLXCfHggiV-IpPF98,1666
+lemonade/profilers/profiler.py,sha256=Y5FSbc386bMlTVbqCuya9pYrso5aTthxahR1V_ZKQ9E,1902
 lemonade/tools/__init__.py,sha256=_6xRc-FHxmujoLjLjWtpYrWYEXtCSneSy-5ya01kyPk,53
 lemonade/tools/accuracy.py,sha256=9HCmczDngkBUuUrt49d2CkRo4J0qyWoFYs5cj20bGkg,11714
 lemonade/tools/adapter.py,sha256=HG54iMd6HDPZ4vnQIl7codq3HzffWbcHSIs_jVbNbhU,2958
 lemonade/tools/bench.py,sha256=aN5LMA_EH6-ZhAH3Gf26JYL7s0eKpUd3j-bReRhzvEY,10016
-lemonade/tools/humaneval.py,sha256=9lzsOaCSECf8LzqkQLFNwy1doAiZtK5gRN-RbZH7GLI,9532
+lemonade/tools/humaneval.py,sha256=JbxuoOzvR4iyxZv4R6MI7a3gUt5ef_Jj6Ie-9VP2wzY,9531
 lemonade/tools/management_tools.py,sha256=RO-lU-hjZhrP9KD9qcLI7MrLu-Rxnkrxzn45qqwKInE,8554
-lemonade/tools/mmlu.py,sha256=aEp9nMKTX5yaSaVZ15YmXbWE0YugjeAacnqjMZ13hHM,11072
-lemonade/tools/perplexity.py,sha256=xHl4cTBpJOCNcVxXhMv6eMp8fgUQmFM0G8DeRnx_rUk,5631
-lemonade/tools/prompt.py,sha256=AT3p5rCGHEs9ozeGxwWl07iKF-mgLxFOkYLjU2btFHs,8638
+lemonade/tools/mmlu.py,sha256=c2QaIMDzjqxCvgHlMXmy_dP1sAFkwkDxL7RO2nogI6s,11071
+lemonade/tools/perplexity.py,sha256=eiaTZ3yhqF2pfwOffVbKKJLwjSri7Im2pC-tBJr7LLU,5638
+lemonade/tools/prompt.py,sha256=cy6McZeLgk26xG1dJEY-cYnY2x8FUdyOOSG86WfBKCg,9348
 lemonade/tools/tool.py,sha256=UsxVYukfm_iM3BfeGYPZxQlTK5UfDfDOl3RIyLr8A1Y,13256
 lemonade/tools/huggingface/bench.py,sha256=-mTfldCtquL4mspq8ykVwDc9Mut5Ecv_jHJnSb0CYGE,6734
 lemonade/tools/huggingface/load.py,sha256=KsSGOBBD-tNEIfYC8mCWV_jpnkjHMhN3juVmC1Ln4uQ,7745
@@ -35,36 +35,36 @@ lemonade/tools/llamacpp/bench.py,sha256=A1X8ULQMxPVsff-AdiUsbWQUKpx7U7nFRNHFJRPd
 lemonade/tools/llamacpp/load.py,sha256=o3vVlefdxmdkHnuvFR3TOxiJkpNAuNFcs9Whfp24jpg,9236
 lemonade/tools/oga/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 lemonade/tools/oga/bench.py,sha256=T3c40NevM3NA7CT98B6vBj1nXfdITDqpfMHYSjhjwpA,5061
-lemonade/tools/oga/load.py,sha256=7Sdf6PFPrqbadPabyJb_uPRUIP09qj21ZYdXz47MqsE,28570
+lemonade/tools/oga/load.py,sha256=XSznW8lOX_KafSq5J5mIBJzj8YJEBpK0RFGcTE1wnE8,28317
 lemonade/tools/oga/utils.py,sha256=p7faMNfT-rLURC9t_s1S_STQRzzLADqbngUliTOOXeQ,16144
 lemonade/tools/quark/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-lemonade/tools/quark/quark_load.py,sha256=tNy-G9yEJ5cTsxw9LmGUYmmdlEzMo_iy-KSIc2YVz6U,5581
-lemonade/tools/quark/quark_quantize.py,sha256=LZrcbLf9oIw7FW2ccP_qkCP32jxmz5YnNEaoY6rsAuY,16583
+lemonade/tools/quark/quark_load.py,sha256=FJ4LJKTToZbHHWVEOBLadae1a3jCnnY4KvXySHbkJMA,5589
+lemonade/tools/quark/quark_quantize.py,sha256=hwoaXhpBIORvJ16MvewphPkaDEQn3BAgXq5o82Gc-_s,16599
 lemonade/tools/report/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 lemonade/tools/report/llm_report.py,sha256=bVHhwCINA-Ok2EdSwAsLubsc83N3KWOVuwTguw7jDcE,6676
-lemonade/tools/report/table.py,sha256=VkTv5Vd0HOXudEthCBnFMrWK73Dm2AQP2_B83vEKBzI,25129
+lemonade/tools/report/table.py,sha256=wJFzKtlmGQH0RQ5O9nevtpMe_-zQ-8zNOndINQuzsjM,27793
 lemonade/tools/server/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-lemonade/tools/server/llamacpp.py,sha256=aDVjjkU2Z2PN25Uuy-lk6ByKPR8kg5r2X-YsVSs4vi8,15624
-lemonade/tools/server/serve.py,sha256=3_jBpi6THnnAmtKOxvPlOkIhSTTmrlZE3fr2Dpto-Q4,52794
+lemonade/tools/server/llamacpp.py,sha256=e1MYKSJBu-jlOE5GQSBsC9CUPAeqw5wXXxoxBKA5zb8,20038
+lemonade/tools/server/serve.py,sha256=ORffC4bcBJ-L5-JbmZX91X3yHt1JWxZcIjrZuu9x8TQ,56165
 lemonade/tools/server/tool_calls.py,sha256=xrAlQwKG-nv2xLlf8f9CDSaUbyMn8ZtHkds9iZLG9K8,5230
-lemonade/tools/server/tray.py,sha256=OI2uCncs8UgnYFLCKHHXq06RETO2RFEcn4xLzMq-q_c,16675
+lemonade/tools/server/tray.py,sha256=4Kf3x8YfRaItPW7lxlEwerD7c5Q2snzcNk3ZrEoae58,17259
 lemonade/tools/server/webapp.py,sha256=8Das5yXOaSBLZmSZ_eddJajQFxBhvl5D6GI_hHlGbE0,1040
 lemonade/tools/server/static/favicon.ico,sha256=hMmP9qGJNeZ0mFS86JIqPbZstXMZn0Z76_HfHQpREAU,126745
-lemonade/tools/server/static/styles.css,sha256=u-SzZ-vh5qEFMDSKLHJ7MsQwvwpJLB_DdJxocf06Sro,16880
-lemonade/tools/server/static/webapp.html,sha256=im7YQkwvbuqrbO-sLhStVqtA6B7HKAn2azZka1KoeJQ,21260
+lemonade/tools/server/static/styles.css,sha256=x-pf7xts0te9JWAafcNFqzE7r1fl6n_H362Eiz49ixI,24722
+lemonade/tools/server/static/webapp.html,sha256=AS61ZBDnZkIUpT-iZFlTnWpkp6Yeozs4obzauX4crlU,35004
 lemonade/tools/server/utils/port.py,sha256=XnIg2qS73QRrsJn6LgHcrJPmku30Tv6vsYcBVMj82K4,2186
 lemonade/tools/server/utils/system_tray.py,sha256=b9lvNv9chJKQxvmH7qzAuUe6H9HsLu7pdHFqGlAJaL0,12654
 lemonade/tools/server/utils/thread.py,sha256=pK9K_6DNWoQ78NArkAX3Ym2WsxLnCs9sKTk6TitlYnI,2804
 lemonade_install/__init__.py,sha256=26zohKg2jgr_5y7tObduWMYQg8zCTWMZHL8lfi2zZVQ,40
 lemonade_install/install.py,sha256=DJWR36QSjZtvEwRjYPNSjhYgoxLjI_6OPrCMZjL0ChY,28263
-lemonade_sdk-8.0.2.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-lemonade_sdk-8.0.2.dist-info/licenses/NOTICE.md,sha256=B8lEqi4QE41J9ljz4Riv2JgHD1v8GCZE6nNBHO3KIA0,2135
+lemonade_sdk-8.0.4.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+lemonade_sdk-8.0.4.dist-info/licenses/NOTICE.md,sha256=B8lEqi4QE41J9ljz4Riv2JgHD1v8GCZE6nNBHO3KIA0,2135
 lemonade_server/cli.py,sha256=z6ojwFaOIz0hbUbVtZWMLP4YDpkcVOmqwmdm55dhKA4,11980
-lemonade_server/model_manager.py,sha256=HqbahDMRv1x8jyQj4pa1rXanlPmcCykt8tlI6WfaxjE,13023
-lemonade_server/pydantic_models.py,sha256=nsbpHqAkd6nkz5QT16u9xMZbCXqccGiy5O0fWecOM88,2338
-lemonade_server/server_models.json,sha256=wTK_H9XDHLxqMWQJqbBsJwm50PhOR4gURyVj9Jm35PQ,6992
-lemonade_sdk-8.0.2.dist-info/METADATA,sha256=hS5Xn5Pjq0RbdLlhedz3HQMCvkRrMWFoAI0Mao4cHwg,8225
-lemonade_sdk-8.0.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-lemonade_sdk-8.0.2.dist-info/entry_points.txt,sha256=gJppn0ETtXXR6ceKWEIRdk42kMC7ps59EmU3NCPyPUk,144
-lemonade_sdk-8.0.2.dist-info/top_level.txt,sha256=10ap5GNiPhalO4V50LRoxA1FqRT9g3Xkia6BITu880k,42
-lemonade_sdk-8.0.2.dist-info/RECORD,,
+lemonade_server/model_manager.py,sha256=0HqLR38uOu_hxRWVYQ_P6YmwaR-jkDuaAqGYo60X8C0,16702
+lemonade_server/pydantic_models.py,sha256=rp_FFhoTwg6jNmgol-kShwffnRDGbt7jTbIeELvgOIo,2876
+lemonade_server/server_models.json,sha256=Y-j9KAvHmfv77welC0rfRao4inLBce6AVySb-oy_uNE,7519
+lemonade_sdk-8.0.4.dist-info/METADATA,sha256=FqA9Jtgx1QE1EjLg_lxcfcAMI3j0cKpZxoe4GnaGLRA,7754
+lemonade_sdk-8.0.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+lemonade_sdk-8.0.4.dist-info/entry_points.txt,sha256=gJppn0ETtXXR6ceKWEIRdk42kMC7ps59EmU3NCPyPUk,144
+lemonade_sdk-8.0.4.dist-info/top_level.txt,sha256=10ap5GNiPhalO4V50LRoxA1FqRT9g3Xkia6BITu880k,42
+lemonade_sdk-8.0.4.dist-info/RECORD,,

lemonade_server/model_manager.py CHANGED Viewed

@@ -54,6 +54,17 @@ class ModelManager:
                 for model_name, model_info in user_models.items()
             }
+            # Backwards compatibility for user models that were created before version 8.0.4
+            # "reasoning" was a boolean, but as of 8.0.4 it became a label
+            for _, model_info in user_models.items():
+                if "reasoning" in model_info:
+                    model_info["labels"] = (
+                        ["reasoning"]
+                        if not model_info["labels"]
+                        else model_info["labels"] + ["reasoning"]
+                    )
+                    del model_info["reasoning"]
             models.update(user_models)
         # Add the model name as a key in each entry, to make it easier
@@ -102,57 +113,131 @@ class ModelManager:
         """
         return self.filter_models_by_backend(self.downloaded_models)
+    def identify_gguf_models(
+        self, checkpoint: str, variant: str, mmproj: str
+    ) -> tuple[dict, list[str]]:
+        """
+        Identifies the GGUF model files in the repository that match the variant.
+        """
+        hint = """
+        The CHECKPOINT:VARIANT scheme is used to specify model files in Hugging Face repositories.
+        The VARIANT format can be one of several types:
+        1. Full filename: exact file to download
+        2. None/empty: gets the first .gguf file in the repository (excludes mmproj files)
+        3. Quantization variant: find a single file ending with the variant name (case insensitive)
+        4. Folder name: downloads all .gguf files in the folder that matches the variant name (case insensitive)
+        Examples:
+        - "unsloth/Qwen3-8B-GGUF:qwen3.gguf" -> downloads "qwen3.gguf"
+        - "unsloth/Qwen3-30B-A3B-GGUF" -> downloads "Qwen3-30B-A3B-GGUF.gguf"
+        - "unsloth/Qwen3-8B-GGUF:Q4_1" -> downloads "Qwen3-8B-GGUF-Q4_1.gguf"
+        - "unsloth/Qwen3-30B-A3B-GGUF:Q4_0" -> downloads all files in "Q4_0/" folder
+        """
+        repo_files = huggingface_hub.list_repo_files(checkpoint)
+        sharded_files = []
+        # (case 1) If variant ends in .gguf, use it directly
+        if variant and variant.endswith(".gguf"):
+            variant_name = variant
+            if variant_name not in repo_files:
+                raise ValueError(
+                    f"File {variant} not found in Hugging Face repository {checkpoint}. {hint}"
+                )
+        # (case 2) If no variant is provided, get the first .gguf file in the repository
+        elif variant is None:
+            all_variants = [
+                f for f in repo_files if f.endswith(".gguf") and "mmproj" not in f
+            ]
+            if len(all_variants) == 0:
+                raise ValueError(
+                    f"No .gguf files found in Hugging Face repository {checkpoint}. {hint}"
+                )
+            variant_name = all_variants[0]
+        else:
+            # (case 3) Find a single file ending with the variant name (case insensitive)
+            end_with_variant = [
+                f
+                for f in repo_files
+                if f.lower().endswith(f"{variant}.gguf".lower())
+                and "mmproj" not in f.lower()
+            ]
+            if len(end_with_variant) == 1:
+                variant_name = end_with_variant[0]
+            elif len(end_with_variant) > 1:
+                raise ValueError(
+                    f"Multiple .gguf files found for variant {variant}, but only one is allowed. {hint}"
+                )
+            # (case 4) Check whether the variant corresponds to a folder with sharded files (case insensitive)
+            else:
+                sharded_files = [
+                    f
+                    for f in repo_files
+                    if f.endswith(".gguf")
+                    and f.lower().startswith(f"{variant}/".lower())
+                ]
+                if not sharded_files:
+                    raise ValueError(
+                        f"No .gguf files found for variant {variant}. {hint}"
+                    )
+                # Sort to ensure consistent ordering
+                sharded_files.sort()
+                # Use first file as primary (this is how llamacpp handles it)
+                variant_name = sharded_files[0]
+        core_files = {"variant": variant_name}
+        # If there is a mmproj file, add it to the patterns
+        if mmproj:
+            if mmproj not in repo_files:
+                raise ValueError(
+                    f"The provided mmproj file {mmproj} was not found in {checkpoint}."
+                )
+            core_files["mmproj"] = mmproj
+        return core_files, sharded_files
     def download_gguf(self, model_config: PullConfig) -> dict:
         """
         Downloads the GGUF file for the given model configuration.
+        For sharded models, if the variant points to a folder (e.g. Q4_0), all files in that folder
+        will be downloaded but only the first file will be returned for loading.
         """
-        # The variant parameter can be either:
-        # 1. A full GGUF filename (e.g. "model-Q4_0.gguf")
-        # 2. A quantization variant (e.g. "Q4_0")
-        # This code handles both cases by constructing the appropriate filename
+        # This code handles all cases by constructing the appropriate filename or pattern
         checkpoint, variant = self.parse_checkpoint(model_config.checkpoint)
-        hf_base_name = checkpoint.split("/")[-1].replace("-GGUF", "")
-        variant_name = (
-            variant if variant.endswith(".gguf") else f"{hf_base_name}-{variant}.gguf"
-        )
-        # If there is a mmproj file, add it to the patterns
-        expected_files = {"variant": variant_name}
-        if model_config.mmproj:
-            expected_files["mmproj"] = model_config.mmproj
+        # Identify the GGUF model files in the repository that match the variant
+        core_files, sharded_files = self.identify_gguf_models(
+            checkpoint, variant, model_config.mmproj
+        )
         # Download the files
         snapshot_folder = huggingface_hub.snapshot_download(
             repo_id=checkpoint,
-            allow_patterns=list(expected_files.values()),
+            allow_patterns=list(core_files.values()) + sharded_files,
         )
-        # Make sure we downloaded something
-        # If we didn't that can indicate that no patterns from allow_patterns match
-        # any files in the HF repo
-        if not os.path.exists(snapshot_folder):
-            raise ValueError(
-                "No patterns matched the variant parameter (CHECKPOINT:VARIANT). "
-                "Try again, providing the full filename of your target .gguf file as the variant."
-                " For example: Qwen/Qwen2.5-Coder-3B-Instruct-GGUF:"
-                "qwen2.5-coder-3b-instruct-q4_0.gguf"
-            )
-        # Ensure we downloaded all expected files while creating a dict of the downloaded files
-        snapshot_files = {}
-        for file in expected_files:
-            snapshot_files[file] = os.path.join(snapshot_folder, expected_files[file])
-            if expected_files[file].lower() not in [
-                name.lower() for name in os.listdir(snapshot_folder)
-            ]:
+        # Ensure we downloaded all expected files
+        for file in list(core_files.values()) + sharded_files:
+            expected_path = os.path.join(snapshot_folder, file)
+            if not os.path.exists(expected_path):
                 raise ValueError(
                     f"Hugging Face snapshot download for {model_config.checkpoint} "
-                    f"expected file {expected_files[file]} not found in {snapshot_folder}"
+                    f"expected file {file} not found at {expected_path}"
                 )
-        # Return a dict that points to the snapshot path of the downloaded GGUF files
-        return snapshot_files
+        # Return a dict of the full path of the core GGUF files
+        return {
+            file_name: os.path.join(snapshot_folder, file_path)
+            for file_name, file_path in core_files.items()
+        }
     def download_models(
         self,
@@ -194,9 +279,8 @@ class ModelManager:
                 new_user_model = {
                     "checkpoint": checkpoint,
                     "recipe": recipe,
-                    "reasoning": reasoning,
                     "suggested": True,
-                    "labels": ["custom"],
+                    "labels": ["custom"] + (["reasoning"] if reasoning else []),
                 }
                 if mmproj:
@@ -249,6 +333,9 @@ class ModelManager:
                 user_models[model_name] = new_user_model
+                # Ensure the cache directory exists before writing the file
+                os.makedirs(os.path.dirname(USER_MODELS_FILE), exist_ok=True)
                 with open(USER_MODELS_FILE, mode="w", encoding="utf-8") as file:
                     json.dump(user_models, fp=file)

lemonade_server/pydantic_models.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from typing import Optional
+from typing import Optional, Union, List, Any
 from pydantic import BaseModel
@@ -65,6 +65,30 @@ class ChatCompletionRequest(BaseModel):
     response_format: dict | None = None
+class EmbeddingsRequest(BaseModel):
+    """
+    Request model for embeddings API endpoint.
+    Generates embeddings for the provided input text or tokens.
+    """
+    input: Union[str, List]
+    model: Optional[str] = None
+    encoding_format: Optional[str] = "float"  # "float" or "base64"
+class RerankingRequest(BaseModel):
+    """
+    Request model for reranking API endpoint.
+    Reranks a list of documents based on their relevance to a query.
+    """
+    query: str
+    documents: List[str]
+    model: str
 class ResponsesRequest(BaseModel):
     """
     Request model for responses API endpoint.

lemonade_server/server_models.json CHANGED Viewed

@@ -2,197 +2,177 @@
     "Qwen2.5-0.5B-Instruct-CPU": {
         "checkpoint": "amd/Qwen2.5-0.5B-Instruct-quantized_int4-float16-cpu-onnx",
         "recipe": "oga-cpu",
-        "reasoning": false,
         "suggested": true
     },
     "Llama-3.2-1B-Instruct-CPU": {
         "checkpoint": "amd/Llama-3.2-1B-Instruct-awq-uint4-float16-cpu-onnx",
         "recipe": "oga-cpu",
-        "reasoning": false,
         "suggested": false
     },
     "Llama-3.2-3B-Instruct-CPU": {
         "checkpoint": "amd/Llama-3.2-3B-Instruct-awq-uint4-float16-cpu-onnx",
         "recipe": "oga-cpu",
-        "reasoning": false,
         "suggested": false
     },
     "Phi-3-Mini-Instruct-CPU": {
         "checkpoint": "amd/Phi-3-mini-4k-instruct_int4_float16_onnx_cpu",
         "recipe": "oga-cpu",
-        "reasoning": false,
         "suggested": true
     },
     "Qwen-1.5-7B-Chat-CPU": {
         "checkpoint": "amd/Qwen1.5-7B-Chat_uint4_asym_g128_float16_onnx_cpu",
         "recipe": "oga-cpu",
-        "reasoning": false,
         "suggested": true
     },
     "DeepSeek-R1-Distill-Llama-8B-CPU": {
         "checkpoint": "amd/DeepSeek-R1-Distill-Llama-8B-awq-asym-uint4-g128-lmhead-onnx-cpu",
         "recipe": "oga-cpu",
-        "reasoning": true,
-        "suggested": true
+        "suggested": true,
+        "labels": ["reasoning"]
     },
     "DeepSeek-R1-Distill-Qwen-7B-CPU": {
         "checkpoint": "amd/DeepSeek-R1-Distill-Llama-8B-awq-asym-uint4-g128-lmhead-onnx-cpu",
         "recipe": "oga-cpu",
-        "reasoning": true,
-        "suggested": true
+        "suggested": true,
+        "labels": ["reasoning"]
     },
     "Llama-3.2-1B-Instruct-Hybrid": {
         "checkpoint": "amd/Llama-3.2-1B-Instruct-awq-g128-int4-asym-fp16-onnx-hybrid",
         "recipe": "oga-hybrid",
-        "reasoning": false,
         "max_prompt_length": 3000,
         "suggested": true
     },
     "Llama-3.2-3B-Instruct-Hybrid": {
         "checkpoint": "amd/Llama-3.2-3B-Instruct-awq-g128-int4-asym-fp16-onnx-hybrid",
         "recipe": "oga-hybrid",
-        "reasoning": false,
         "max_prompt_length": 2000,
         "suggested": true
     },
     "Phi-3-Mini-Instruct-Hybrid": {
         "checkpoint": "amd/Phi-3-mini-4k-instruct-awq-g128-int4-asym-fp16-onnx-hybrid",
         "recipe": "oga-hybrid",
-        "reasoning": false,
         "max_prompt_length": 2000,
         "suggested": true
     },
     "Phi-3.5-Mini-Instruct-Hybrid": {
         "checkpoint": "amd/Phi-3.5-mini-instruct-awq-g128-int4-asym-fp16-onnx-hybrid",
         "recipe": "oga-hybrid",
-        "reasoning": false,
         "suggested": false
     },
     "Qwen-1.5-7B-Chat-Hybrid": {
         "checkpoint": "amd/Qwen1.5-7B-Chat-awq-g128-int4-asym-fp16-onnx-hybrid",
         "recipe": "oga-hybrid",
-        "reasoning": false,
         "max_prompt_length": 3000,
         "suggested": true
     },
     "DeepSeek-R1-Distill-Llama-8B-Hybrid": {
         "checkpoint": "amd/DeepSeek-R1-Distill-Llama-8B-awq-asym-uint4-g128-lmhead-onnx-hybrid",
         "recipe": "oga-hybrid",
-        "reasoning": true,
         "max_prompt_length": 2000,
-        "suggested": true
+        "suggested": true,
+        "labels": ["reasoning"]
     },
     "DeepSeek-R1-Distill-Qwen-7B-Hybrid": {
         "checkpoint": "amd/DeepSeek-R1-Distill-Qwen-7B-awq-asym-uint4-g128-lmhead-onnx-hybrid",
         "recipe": "oga-hybrid",
-        "reasoning": true,
         "max_prompt_length": 2000,
-        "suggested": true
+        "suggested": true,
+        "labels": ["reasoning"]
     },
     "Mistral-7B-v0.3-Instruct-Hybrid": {
         "checkpoint": "amd/Mistral-7B-Instruct-v0.3-awq-g128-int4-asym-fp16-onnx-hybrid",
         "recipe": "oga-hybrid",
-        "reasoning": false,
         "max_prompt_length": 2000,
         "suggested": true
     },
     "Llama-3.1-8B-Instruct-Hybrid": {
         "checkpoint": "amd/Llama-3.1-8B-Instruct-awq-asym-uint4-g128-lmhead-onnx-hybrid",
         "recipe": "oga-hybrid",
-        "reasoning": false,
         "max_prompt_length": 2000,
         "suggested": true
     },
     "Llama-xLAM-2-8b-fc-r-Hybrid": {
         "checkpoint": "amd/Llama-xLAM-2-8b-fc-r-awq-g128-int4-asym-bfp16-onnx-hybrid",
         "recipe": "oga-hybrid",
-        "reasoning": false,
         "max_prompt_length": 2000,
         "suggested": true
     },
     "Llama-3.2-1B-Instruct-DirectML": {
         "checkpoint": "amd/Llama-3.2-1B-Instruct-dml-int4-awq-block-128-directml",
         "recipe": "oga-igpu",
-        "reasoning": false,
         "suggested": false
     },
     "Llama-3.2-3B-Instruct-DirectML": {
         "checkpoint": "amd/Llama-3.2-3B-Instruct-dml-int4-awq-block-128-directml",
         "recipe": "oga-igpu",
-        "reasoning": false,
         "suggested": false
     },
     "Phi-3.5-Mini-Instruct-DirectML": {
         "checkpoint": "amd/phi3.5-mini-instruct-int4-awq-block-128-directml",
         "recipe": "oga-igpu",
-        "reasoning": false,
         "suggested": false
     },
     "Qwen-1.5-7B-Chat-DirectML": {
         "checkpoint": "amd/Qwen1.5-7B-Chat-dml-int4-awq-block-128-directml",
         "recipe": "oga-igpu",
-        "reasoning": false,
         "suggested": false
     },
     "Mistral-7B-v0.1-Instruct-DirectML": {
         "checkpoint": "amd/Mistral-7B-Instruct-v0.1-awq-g128-int4-onnx-directml",
         "recipe": "oga-igpu",
-        "reasoning": false,
         "suggested": false
     },
     "Llama-3-8B-Instruct-DirectML": {
         "checkpoint": "amd/llama3-8b-instruct-awq-g128-int4-onnx-directml",
         "recipe": "oga-igpu",
-        "reasoning": false,
         "suggested": false
     },
     "Qwen3-0.6B-GGUF": {
         "checkpoint": "unsloth/Qwen3-0.6B-GGUF:Q4_0",
         "recipe": "llamacpp",
-        "reasoning": true,
-        "suggested": true
+        "suggested": true,
+        "labels": ["reasoning"]
     },
     "Qwen3-1.7B-GGUF": {
         "checkpoint": "unsloth/Qwen3-1.7B-GGUF:Q4_0",
         "recipe": "llamacpp",
-        "reasoning": true,
-        "suggested": true
+        "suggested": true,
+        "labels": ["reasoning"]
     },
     "Qwen3-4B-GGUF": {
         "checkpoint": "unsloth/Qwen3-4B-GGUF:Q4_0",
         "recipe": "llamacpp",
-        "reasoning": true,
-        "suggested": true
+        "suggested": true,
+        "labels": ["reasoning"]
     },
     "Qwen3-8B-GGUF": {
         "checkpoint": "unsloth/Qwen3-8B-GGUF:Q4_1",
         "recipe": "llamacpp",
-        "reasoning": true,
-        "suggested": true
+        "suggested": true,
+        "labels": ["reasoning"]
     },
     "DeepSeek-Qwen3-8B-GGUF": {
         "checkpoint": "unsloth/DeepSeek-R1-0528-Qwen3-8B-GGUF:Q4_1",
         "recipe": "llamacpp",
-        "reasoning": true,
-        "suggested": true
+        "suggested": true,
+        "labels": ["reasoning"]
     },
     "Qwen3-14B-GGUF": {
         "checkpoint": "unsloth/Qwen3-14B-GGUF:Q4_0",
         "recipe": "llamacpp",
-        "reasoning": true,
-        "suggested": true
+        "suggested": true,
+        "labels": ["reasoning"]
     },
     "Qwen3-30B-A3B-GGUF": {
         "checkpoint": "unsloth/Qwen3-30B-A3B-GGUF:Q4_0",
         "recipe": "llamacpp",
-        "reasoning": true,
-        "suggested": true
+        "suggested": true,
+        "labels": ["reasoning"]
     },
     "Gemma-3-4b-it-GGUF": {
         "checkpoint": "ggml-org/gemma-3-4b-it-GGUF:Q4_K_M",
         "mmproj": "mmproj-model-f16.gguf",
         "recipe": "llamacpp",
-        "reasoning": false,
         "suggested": true,
         "labels": ["vision"]
     },
@@ -200,8 +180,38 @@
         "checkpoint": "ggml-org/Qwen2.5-VL-7B-Instruct-GGUF:Q4_K_M",
         "mmproj": "mmproj-Qwen2.5-VL-7B-Instruct-f16.gguf",
         "recipe": "llamacpp",
-        "reasoning": false,
         "suggested": true,
         "labels": ["vision"]
+    },
+    "Llama-4-Scout-17B-16E-Instruct-GGUF": {
+        "checkpoint": "unsloth/Llama-4-Scout-17B-16E-Instruct-GGUF:Q4_K_S",
+        "mmproj": "mmproj-F16.gguf",
+        "recipe": "llamacpp",
+        "suggested": true,
+        "labels": ["vision"]
+    },
+    "nomic-embed-text-v1-GGUF": {
+        "checkpoint": "nomic-ai/nomic-embed-text-v1-GGUF:Q4_K_S",
+        "recipe": "llamacpp",
+        "suggested": true,
+        "labels": ["embeddings"]
+    },
+    "nomic-embed-text-v2-moe-GGUF": {
+        "checkpoint": "nomic-ai/nomic-embed-text-v2-moe-GGUF:Q8_0",
+        "recipe": "llamacpp",
+        "suggested": true,
+        "labels": ["embeddings"]
+    },
+    "bge-reranker-v2-m3-GGUF": {
+        "checkpoint": "pqnet/bge-reranker-v2-m3-Q8_0-GGUF",
+        "recipe": "llamacpp",
+        "suggested": true,
+        "labels": ["reranking"]
+    },
+    "jina-reranker-v1-tiny-en-GGUF": {
+        "checkpoint": "mradermacher/jina-reranker-v1-tiny-en-GGUF:Q8_0",
+        "recipe": "llamacpp",
+        "suggested": false,
+        "labels": ["reranking"]
     }
 }

{lemonade_sdk-8.0.2.dist-info → lemonade_sdk-8.0.4.dist-info}/WHEEL RENAMED Viewed

File without changes

{lemonade_sdk-8.0.2.dist-info → lemonade_sdk-8.0.4.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{lemonade_sdk-8.0.2.dist-info → lemonade_sdk-8.0.4.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{lemonade_sdk-8.0.2.dist-info → lemonade_sdk-8.0.4.dist-info}/licenses/NOTICE.md RENAMED Viewed

File without changes

{lemonade_sdk-8.0.2.dist-info → lemonade_sdk-8.0.4.dist-info}/top_level.txt RENAMED Viewed

File without changes

lemonade-sdk 8.0.2__py3-none-any.whl → 8.0.4__py3-none-any.whl

Potentially problematic release.

lemonade-sdk 8.0.2py3-none-any.whl → 8.0.4py3-none-any.whl