lemonade-sdk 8.0.6__py3-none-any.whl → 8.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lemonade-sdk might be problematic. Click here for more details.

@@ -284,7 +284,7 @@ class Server(ManagementTool):
284
284
  def _setup_server_common(
285
285
  self,
286
286
  port: int,
287
- truncate_inputs: bool = False,
287
+ truncate_inputs: Optional[int] = None,
288
288
  log_level: str = DEFAULT_LOG_LEVEL,
289
289
  tray: bool = False,
290
290
  log_file: str = None,
@@ -295,7 +295,7 @@ class Server(ManagementTool):
295
295
 
296
296
  Args:
297
297
  port: Port number for the server
298
- truncate_inputs: Whether to truncate inputs if they exceed max length
298
+ truncate_inputs: Truncate messages to this length
299
299
  log_level: Logging level to configure
300
300
  threaded_mode: Whether this is being set up for threaded execution
301
301
  """
@@ -372,7 +372,7 @@ class Server(ManagementTool):
372
372
  _=None,
373
373
  port: int = DEFAULT_PORT,
374
374
  log_level: str = DEFAULT_LOG_LEVEL,
375
- truncate_inputs: bool = False,
375
+ truncate_inputs: Optional[int] = None,
376
376
  tray: bool = False,
377
377
  log_file: str = None,
378
378
  ):
@@ -393,7 +393,7 @@ class Server(ManagementTool):
393
393
  port: int = DEFAULT_PORT,
394
394
  host: str = "localhost",
395
395
  log_level: str = "warning",
396
- truncate_inputs: bool = False,
396
+ truncate_inputs: Optional[int] = None,
397
397
  ):
398
398
  """
399
399
  Set up the server for running in a thread.
@@ -1099,29 +1099,20 @@ class Server(ManagementTool):
1099
1099
  )
1100
1100
  self.input_tokens = len(input_ids[0])
1101
1101
 
1102
- if (
1103
- self.llm_loaded.max_prompt_length
1104
- and self.input_tokens > self.llm_loaded.max_prompt_length
1105
- ):
1106
- if self.truncate_inputs:
1107
- # Truncate input ids
1108
- truncate_amount = self.input_tokens - self.llm_loaded.max_prompt_length
1109
- input_ids = input_ids[: self.llm_loaded.max_prompt_length]
1110
-
1111
- # Update token count
1112
- self.input_tokens = len(input_ids)
1113
-
1114
- # Show warning message
1115
- truncation_message = (
1116
- f"Input exceeded {self.llm_loaded.max_prompt_length} tokens. "
1117
- f"Truncated {truncate_amount} tokens."
1118
- )
1119
- logging.warning(truncation_message)
1120
- else:
1121
- raise RuntimeError(
1122
- f"Prompt tokens ({self.input_tokens}) cannot be greater "
1123
- f"than the model's max prompt length ({self.llm_loaded.max_prompt_length})"
1124
- )
1102
+ if self.truncate_inputs and self.truncate_inputs > self.input_tokens:
1103
+ # Truncate input ids
1104
+ truncate_amount = self.input_tokens - self.truncate_inputs
1105
+ input_ids = input_ids[: self.truncate_inputs]
1106
+
1107
+ # Update token count
1108
+ self.input_tokens = len(input_ids)
1109
+
1110
+ # Show warning message
1111
+ truncation_message = (
1112
+ f"Input exceeded {self.truncate_inputs} tokens. "
1113
+ f"Truncated {truncate_amount} tokens."
1114
+ )
1115
+ logging.warning(truncation_message)
1125
1116
 
1126
1117
  # Log the input tokens early to avoid this not showing due to potential crashes
1127
1118
  logging.debug(f"Input Tokens: {self.input_tokens}")
@@ -1317,7 +1308,7 @@ class Server(ManagementTool):
1317
1308
  self.tokenizer = None
1318
1309
  self.model = None
1319
1310
 
1320
- default_message = f"model {model_reference} not found"
1311
+ default_message = "see stack trace and error message below"
1321
1312
  if message:
1322
1313
  detail = message
1323
1314
  else:
@@ -27,7 +27,6 @@ body {
27
27
  min-height: 100vh;
28
28
  display: flex;
29
29
  flex-direction: column;
30
- padding-bottom: 5rem;
31
30
  }
32
31
 
33
32
  body::before {
@@ -102,13 +101,9 @@ body::before {
102
101
  }
103
102
 
104
103
  .site-footer {
105
- position: fixed;
106
- left: 0;
107
- bottom: 0;
108
- width: 100%;
109
104
  background: transparent;
110
105
  padding-top: 0.5rem;
111
- z-index: 100;
106
+ margin-top: auto;
112
107
  }
113
108
 
114
109
  .dad-joke {
@@ -535,6 +530,10 @@ body::before {
535
530
  background-color: #ca4747;
536
531
  }
537
532
 
533
+ .model-label.coding {
534
+ background-color: #ff6b35;
535
+ }
536
+
538
537
  .model-labels-container {
539
538
  display: flex;
540
539
  align-items: center;
@@ -109,6 +109,7 @@
109
109
  </label>
110
110
  <select id="register-recipe" name="recipe" required>
111
111
  <option value="llamacpp">llamacpp</option>
112
+ <option value="oga-npu">oga-npu</option>
112
113
  <option value="oga-hybrid">oga-hybrid</option>
113
114
  <option value="oga-cpu">oga-cpu</option>
114
115
  </select>
@@ -413,6 +414,8 @@
413
414
  labelClass = 'reasoning';
414
415
  } else if (labelLower === 'reranking') {
415
416
  labelClass = 'reranking';
417
+ } else if (labelLower === 'coding') {
418
+ labelClass = 'coding';
416
419
  }
417
420
  labelSpan.className = `model-label ${labelClass}`;
418
421
  labelSpan.textContent = label;
lemonade/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "8.0.6"
1
+ __version__ = "8.1.0"
@@ -24,22 +24,6 @@
24
24
  # In any python environment, only one set of artifacts can be installed at a time.
25
25
  # Python environments created by Lemonade v6.1.x or earlier will need to be recreated.
26
26
  #
27
- # The Ryzen AI 1.3.0 artifact files use a different directory hierarchy.
28
- # The Ryzen AI 1.3.0 hybrid artifacts directory hierarchy is:
29
- #
30
- # RYZEN_AI\hybrid\hybrid-llm-artifacts_1.3.0_lounge\hybrid-llm-artifacts\
31
- # onnxruntime_genai\lib
32
- # onnxruntime_genai\wheel
33
- # onnx_utils\bin
34
- # eula\eula
35
- #
36
- # The Ryzen AI 1.3.0 npu artifacts directory hierarchy is:
37
- #
38
- # RYZEN_AI\npu\amd_oga\
39
- # bins\xclbin\stx
40
- # libs
41
- # wheels
42
- #
43
27
 
44
28
  import argparse
45
29
  import glob
@@ -56,6 +40,13 @@ import zipfile
56
40
  DEFAULT_RYZEN_AI_VERSION = "1.4.0"
57
41
  version_info_filename = "version_info.json"
58
42
 
43
+ # NPU Driver configuration
44
+ NPU_DRIVER_DOWNLOAD_URL = (
45
+ "https://account.amd.com/en/forms/downloads/"
46
+ "ryzenai-eula-public-xef.html?filename=NPU_RAI1.5_280_WHQL.zip"
47
+ )
48
+ REQUIRED_NPU_DRIVER_VERSION = "32.0.203.280"
49
+
59
50
  lemonade_install_dir = Path(__file__).parent.parent.parent
60
51
  DEFAULT_QUARK_VERSION = "quark-0.6.0"
61
52
  DEFAULT_QUARK_DIR = os.path.join(
@@ -66,14 +57,6 @@ DEFAULT_QUARK_DIR = os.path.join(
66
57
  SUPPORTED_RYZEN_AI_SERIES = ["300"]
67
58
 
68
59
  npu_install_data = {
69
- "1.3.0": {
70
- "artifacts_zipfile": "ryzen_ai_13_ga/npu-llm-artifacts_1.3.0.zip",
71
- "license_file": (
72
- "https://account.amd.com/content/dam/account/en/licenses/download/"
73
- "amd-end-user-license-agreement.pdf"
74
- ),
75
- "license_tag": "Beta ",
76
- },
77
60
  "1.4.0": {
78
61
  "artifacts_zipfile": (
79
62
  "https://www.xilinx.com/bin/public/openDownload?"
@@ -88,17 +71,6 @@ npu_install_data = {
88
71
  }
89
72
 
90
73
  hybrid_install_data = {
91
- "1.3.0": {
92
- "artifacts_zipfile": (
93
- "https://www.xilinx.com/bin/public/openDownload?"
94
- "filename=hybrid-llm-artifacts_1.3.0_012725.zip"
95
- ),
96
- "license_file": (
97
- "https://www.xilinx.com/bin/public/openDownload?"
98
- "filename=AMD%20End%20User%20License%20Agreement.pdf"
99
- ),
100
- "license_tag": "",
101
- },
102
74
  "1.4.0": {
103
75
  "artifacts_zipfile": (
104
76
  "https://www.xilinx.com/bin/public/openDownload?"
@@ -154,10 +126,7 @@ def get_oga_npu_dir():
154
126
  version_info = get_ryzen_ai_version_info()
155
127
  version = version_info["version"]
156
128
  ryzen_ai_folder = get_ryzen_ai_path()
157
- if "1.3.0" in version:
158
- npu_dir = os.path.join(ryzen_ai_folder, "npu", "amd_oga")
159
- else:
160
- npu_dir = os.path.join(ryzen_ai_folder, "npu")
129
+ npu_dir = os.path.join(ryzen_ai_folder, "npu")
161
130
  if not os.path.isdir(npu_dir):
162
131
  raise RuntimeError(
163
132
  f"The npu artifacts are missing from the Ryzen AI folder {ryzen_ai_folder}. "
@@ -171,15 +140,7 @@ def get_oga_hybrid_dir():
171
140
  version_info = get_ryzen_ai_version_info()
172
141
  version = version_info["version"]
173
142
  ryzen_ai_folder = get_ryzen_ai_path()
174
- if "1.3.0" in version:
175
- hybrid_dir = os.path.join(
176
- ryzen_ai_folder,
177
- "hybrid",
178
- "hybrid-llm-artifacts_1.3.0_lounge",
179
- "hybrid-llm-artifacts",
180
- )
181
- else:
182
- hybrid_dir = os.path.join(ryzen_ai_folder, "hybrid")
143
+ hybrid_dir = os.path.join(ryzen_ai_folder, "hybrid")
183
144
  if not os.path.isdir(hybrid_dir):
184
145
  raise RuntimeError(
185
146
  f"The hybrid artifacts are missing from the Ryzen AI folder {ryzen_ai_folder}. "
@@ -189,6 +150,37 @@ def get_oga_hybrid_dir():
189
150
  return hybrid_dir, version
190
151
 
191
152
 
153
+ def _get_ryzenai_version_info(device=None):
154
+ """
155
+ Centralized version detection for RyzenAI installations.
156
+ Uses lazy imports to avoid import errors when OGA is not installed.
157
+ """
158
+ try:
159
+ # Lazy import to avoid errors when OGA is not installed
160
+ from packaging.version import Version
161
+ import onnxruntime_genai as og
162
+
163
+ if Version(og.__version__) >= Version("0.7.0"):
164
+ oga_path = os.path.dirname(og.__file__)
165
+ if og.__version__ == "0.7.0.2":
166
+ return "1.5.0", oga_path
167
+ else:
168
+ return "1.4.0", oga_path
169
+ else:
170
+ if device == "npu":
171
+ oga_path, version = get_oga_npu_dir()
172
+ else:
173
+ oga_path, version = get_oga_hybrid_dir()
174
+ return version, oga_path
175
+ except ImportError as e:
176
+ raise ImportError(
177
+ f"{e}\n Please install lemonade-sdk with "
178
+ "one of the oga extras, for example:\n"
179
+ "pip install lemonade-sdk[dev,oga-cpu]\n"
180
+ "See https://lemonade_server.ai/install_options.html for details"
181
+ ) from e
182
+
183
+
192
184
  def download_lfs_file(token, file, output_filename):
193
185
  """Downloads a file from LFS"""
194
186
  import requests
@@ -426,8 +418,6 @@ class Install:
426
418
  "npu",
427
419
  "hybrid",
428
420
  "unified",
429
- "npu-1.3.0",
430
- "hybrid-1.3.0",
431
421
  "npu-1.4.0",
432
422
  "hybrid-1.4.0",
433
423
  "unified-1.4.0",
@@ -524,25 +514,14 @@ class Install:
524
514
  # Install all whl files in the specified wheels folder
525
515
  if wheels_full_path is not None:
526
516
  print(f"\nInstalling wheels from {wheels_full_path}\n")
527
- if version == "1.3.0":
528
- # Install one wheel file at a time (1.3.0 npu build only works this way)
529
- for file in os.listdir(wheels_full_path):
530
- if file.endswith(".whl"):
531
- install_cmd = (
532
- f"{sys.executable} -m pip install "
533
- f"{os.path.join(wheels_full_path, file)}"
534
- )
535
- print(f"\nInstalling {file} with command {install_cmd}\n")
536
- subprocess.run(install_cmd, check=True, shell=True)
537
- else:
538
- # Install all the wheel files together, allowing pip to work out the dependencies
539
- wheel_files = glob.glob(os.path.join(wheels_full_path, "*.whl"))
540
- install_cmd = [sys.executable, "-m", "pip", "install"] + wheel_files
541
- subprocess.run(
542
- install_cmd,
543
- check=True,
544
- shell=True,
545
- )
517
+ # Install all the wheel files together, allowing pip to work out the dependencies
518
+ wheel_files = glob.glob(os.path.join(wheels_full_path, "*.whl"))
519
+ install_cmd = [sys.executable, "-m", "pip", "install"] + wheel_files
520
+ subprocess.run(
521
+ install_cmd,
522
+ check=True,
523
+ shell=True,
524
+ )
546
525
 
547
526
  # Delete the zip file
548
527
  print(f"\nCleaning up, removing {archive_file_path}\n")
@@ -611,10 +590,7 @@ class Install:
611
590
  license_file = npu_install_data[version].get("license_file", None)
612
591
  license_tag = npu_install_data[version].get("license_tag", None)
613
592
  install_dir = os.path.join(ryzen_ai_folder, "npu")
614
- if version == "1.3.0":
615
- wheels_full_path = os.path.join(install_dir, "amd_oga/wheels")
616
- else:
617
- wheels_full_path = os.path.join(install_dir, "wheels")
593
+ wheels_full_path = os.path.join(install_dir, "wheels")
618
594
 
619
595
  if license_file:
620
596
  Install._get_license_acceptance(version, license_file, license_tag, yes)
@@ -641,17 +617,7 @@ class Install:
641
617
  license_file = hybrid_install_data[version].get("license_file", None)
642
618
  license_tag = hybrid_install_data[version].get("license_tag", None)
643
619
  install_dir = os.path.join(ryzen_ai_folder, "hybrid")
644
- if version == "1.3.0":
645
- wheels_full_path = os.path.join(
646
- ryzen_ai_folder,
647
- "hybrid",
648
- "hybrid-llm-artifacts_1.3.0_lounge",
649
- "hybrid-llm-artifacts",
650
- "onnxruntime_genai",
651
- "wheel",
652
- )
653
- else:
654
- wheels_full_path = os.path.join(install_dir, "wheels")
620
+ wheels_full_path = os.path.join(install_dir, "wheels")
655
621
 
656
622
  if license_file:
657
623
  Install._get_license_acceptance(version, license_file, license_tag, yes)
@@ -669,6 +635,21 @@ class Install:
669
635
  # Check if the processor is supported before proceeding
670
636
  check_ryzen_ai_processor()
671
637
 
638
+ warning_msg = (
639
+ "\n" + "=" * 80 + "\n"
640
+ "WARNING: IMPORTANT: NEW RYZEN AI 1.5.0 INSTALLATION PROCESS\n"
641
+ + "=" * 80
642
+ + "\n"
643
+ "Starting with Ryzen AI 1.5.0, installation is now available through PyPI.\n"
644
+ "For new installations, consider using:\n\n"
645
+ "pip install lemonade-sdk[oga-ryzenai] --extra-index-url https://pypi.amd.com/simple\n\n"
646
+ "This legacy installation method (lemonade-install --ryzenai) is still\n"
647
+ "supported for version 1.4.0, but may be deprecated in future releases.\n"
648
+ + "=" * 80
649
+ + "\n"
650
+ )
651
+ print(warning_msg)
652
+
672
653
  # Delete any previous Ryzen AI installation in this environment
673
654
  ryzen_ai_folder = get_ryzen_ai_path(check_exists=False)
674
655
  if os.path.exists(ryzen_ai_folder):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lemonade-sdk
3
- Version: 8.0.6
3
+ Version: 8.1.0
4
4
  Summary: Lemonade SDK: Your LLM Aide for Validation and Deployment
5
5
  Author-email: lemonade@amd.com
6
6
  Requires-Python: >=3.10, <3.13
@@ -22,16 +22,15 @@ Requires-Dist: pytz
22
22
  Requires-Dist: zstandard
23
23
  Requires-Dist: fastapi
24
24
  Requires-Dist: uvicorn[standard]
25
- Requires-Dist: openai>=1.81.0
25
+ Requires-Dist: openai<1.97.1,>=1.81.0
26
26
  Requires-Dist: transformers<=4.53.2
27
27
  Requires-Dist: jinja2
28
28
  Requires-Dist: tabulate
29
29
  Requires-Dist: sentencepiece
30
30
  Requires-Dist: huggingface-hub==0.33.0
31
- Provides-Extra: oga-hybrid
32
- Requires-Dist: onnx==1.16.1; extra == "oga-hybrid"
33
- Requires-Dist: numpy==1.26.4; extra == "oga-hybrid"
34
- Requires-Dist: protobuf>=6.30.1; extra == "oga-hybrid"
31
+ Provides-Extra: oga-ryzenai
32
+ Requires-Dist: onnxruntime-genai-directml-ryzenai==0.7.0.2; extra == "oga-ryzenai"
33
+ Requires-Dist: protobuf>=6.30.1; extra == "oga-ryzenai"
35
34
  Provides-Extra: oga-cpu
36
35
  Requires-Dist: onnxruntime-genai==0.8.2; extra == "oga-cpu"
37
36
  Requires-Dist: onnxruntime>=1.22.0; extra == "oga-cpu"
@@ -43,14 +42,32 @@ Requires-Dist: pandas>=1.5.3; extra == "dev"
43
42
  Requires-Dist: matplotlib; extra == "dev"
44
43
  Requires-Dist: human-eval-windows==1.0.4; extra == "dev"
45
44
  Requires-Dist: lm-eval[api]; extra == "dev"
45
+ Provides-Extra: oga-hybrid
46
+ Requires-Dist: lemonade-sdk[oga-ryzenai]; extra == "oga-hybrid"
47
+ Provides-Extra: oga-unified
48
+ Requires-Dist: lemonade-sdk[oga-ryzenai]; extra == "oga-unified"
46
49
  Provides-Extra: oga-hybrid-minimal
47
- Requires-Dist: lemonade-sdk[oga-hybrid]; extra == "oga-hybrid-minimal"
50
+ Requires-Dist: lemonade-sdk[oga-ryzenai]; extra == "oga-hybrid-minimal"
48
51
  Provides-Extra: oga-cpu-minimal
49
52
  Requires-Dist: lemonade-sdk[oga-cpu]; extra == "oga-cpu-minimal"
53
+ Provides-Extra: oga-npu-minimal
54
+ Requires-Dist: lemonade-sdk[oga-ryzenai]; extra == "oga-npu-minimal"
50
55
  Provides-Extra: llm
51
56
  Requires-Dist: lemonade-sdk[dev]; extra == "llm"
52
57
  Provides-Extra: llm-oga-cpu
53
58
  Requires-Dist: lemonade-sdk[dev,oga-cpu]; extra == "llm-oga-cpu"
59
+ Provides-Extra: llm-oga-npu
60
+ Requires-Dist: onnx==1.16.0; extra == "llm-oga-npu"
61
+ Requires-Dist: onnxruntime==1.18.0; extra == "llm-oga-npu"
62
+ Requires-Dist: numpy==1.26.4; extra == "llm-oga-npu"
63
+ Requires-Dist: protobuf>=6.30.1; extra == "llm-oga-npu"
64
+ Requires-Dist: lemonade-sdk[dev]; extra == "llm-oga-npu"
65
+ Provides-Extra: llm-oga-hybrid
66
+ Requires-Dist: onnx==1.16.1; extra == "llm-oga-hybrid"
67
+ Requires-Dist: numpy==1.26.4; extra == "llm-oga-hybrid"
68
+ Requires-Dist: protobuf>=6.30.1; extra == "llm-oga-hybrid"
69
+ Provides-Extra: llm-oga-unified
70
+ Requires-Dist: lemonade-sdk[dev,llm-oga-hybrid]; extra == "llm-oga-unified"
54
71
  Provides-Extra: llm-oga-igpu
55
72
  Requires-Dist: onnxruntime-genai-directml==0.6.0; extra == "llm-oga-igpu"
56
73
  Requires-Dist: onnxruntime-directml<1.22.0,>=1.19.0; extra == "llm-oga-igpu"
@@ -61,16 +78,6 @@ Requires-Dist: onnxruntime-genai-cuda==0.8.2; extra == "llm-oga-cuda"
61
78
  Requires-Dist: onnxruntime-gpu>=1.22.0; extra == "llm-oga-cuda"
62
79
  Requires-Dist: transformers<=4.51.3; extra == "llm-oga-cuda"
63
80
  Requires-Dist: lemonade-sdk[dev]; extra == "llm-oga-cuda"
64
- Provides-Extra: llm-oga-npu
65
- Requires-Dist: onnx==1.16.0; extra == "llm-oga-npu"
66
- Requires-Dist: onnxruntime==1.18.0; extra == "llm-oga-npu"
67
- Requires-Dist: numpy==1.26.4; extra == "llm-oga-npu"
68
- Requires-Dist: protobuf>=6.30.1; extra == "llm-oga-npu"
69
- Requires-Dist: lemonade-sdk[dev]; extra == "llm-oga-npu"
70
- Provides-Extra: llm-oga-hybrid
71
- Requires-Dist: lemonade-sdk[dev,oga-hybrid]; extra == "llm-oga-hybrid"
72
- Provides-Extra: llm-oga-unified
73
- Requires-Dist: lemonade-sdk[llm-oga-hybrid]; extra == "llm-oga-unified"
74
81
  Dynamic: author-email
75
82
  Dynamic: description
76
83
  Dynamic: description-content-type
@@ -174,7 +181,7 @@ lemonade-server list
174
181
 
175
182
  ## Model Library
176
183
 
177
- Lemonade supports both GGUF and ONNX models as detailed in the [Supported Configuration](#supported-configurations) section. A list of all built-in models is available [here](https://lemonade-server.ai/docs/server/models/).
184
+ Lemonade supports both GGUF and ONNX models as detailed in the [Supported Configuration](#supported-configurations) section. A list of all built-in models is available [here](https://lemonade-server.ai/docs/server/server_models/).
178
185
 
179
186
  You can also import custom GGUF and ONNX models from Hugging Face by using our [Model Manager](http://localhost:8000/#model-management) (requires server to be running).
180
187
  <p align="center">
@@ -263,7 +270,7 @@ completion = client.chat.completions.create(
263
270
  print(completion.choices[0].message.content)
264
271
  ```
265
272
 
266
- For more detailed integration instructions, see the [Integration Guide](./server_integration.md).
273
+ For more detailed integration instructions, see the [Integration Guide](./docs/server/server_integration.md).
267
274
 
268
275
  ## Beyond an LLM Server
269
276
 
@@ -272,6 +279,10 @@ The [Lemonade SDK](./docs/README.md) also include the following components:
272
279
  - 🐍 **[Lemonade API](./docs/lemonade_api.md)**: High-level Python API to directly integrate Lemonade LLMs into Python applications.
273
280
  - 🖥️ **[Lemonade CLI](./docs/dev_cli/README.md)**: The `lemonade` CLI lets you mix-and-match LLMs (ONNX, GGUF, SafeTensors) with prompting templates, accuracy testing, performance benchmarking, and memory profiling to characterize your models on your hardware.
274
281
 
282
+ ## FAQ
283
+
284
+ To read our frequently asked questions, see our [FAQ Guide](./docs/faq.md)
285
+
275
286
  ## Contributing
276
287
 
277
288
  We are actively seeking collaborators from across the industry. If you would like to contribute to this project, please check out our [contribution guide](./docs/contribute.md).
@@ -4,14 +4,14 @@ lemonade/cache.py,sha256=5iZbk273TiTMqK_vdzPOPYTo6VsWW2gNByOISA9zi1w,3002
4
4
  lemonade/cli.py,sha256=9Pcs3PcrWC2F8_pcBaz09xHUICIJTvpemBdPGyXkjIk,4395
5
5
  lemonade/sequence.py,sha256=KSH7BPsiyDKsOsg_ziQKEGsDwMmuO_YbgPRBxkZd0pw,13267
6
6
  lemonade/state.py,sha256=sdSezla7Cd7KYL90xY3p9kcNV4ndSyN6UvNLOr3vBMA,5261
7
- lemonade/version.py,sha256=0UVFH05U_SSITAGCT0SSjcWSJnwORBaDn5ZSlvquMo8,22
7
+ lemonade/version.py,sha256=c04nFsyfS0zYoDvZjLO-uEi12TFB5EWSD6fiWiI7OLQ,22
8
8
  lemonade/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
9
  lemonade/common/build.py,sha256=zTb0m1-kuUx6zw5QHp2SNnVuN6jOTMQ2FCdj9iH374U,6140
10
10
  lemonade/common/cli_helpers.py,sha256=hjBfXrTtFl8gmCFlL-ksviXR0mOcdPtTWVNKoEp3PG4,4993
11
11
  lemonade/common/exceptions.py,sha256=w83sVKmL1QXoJlGjj_bRyjIBMhlMqdVQy_FEOTu2YQI,2050
12
12
  lemonade/common/filesystem.py,sha256=QV3cHhKNu-7W2rr8wZ4JQfD2rP_5T2Js7jiDQBYWHVQ,12142
13
13
  lemonade/common/inference_engines.py,sha256=lcmir_pATr71TfSBJoIZEi3G9xyxNwi2_xpPvPD8_xI,12932
14
- lemonade/common/network.py,sha256=EkWTxjJ-UM469nEJdzEl-5UNO7dPfVfFzU2SVMMPaD0,1425
14
+ lemonade/common/network.py,sha256=p1lWJkN0H5hCpb4rKi3Zc47W_BRrrm-7ghdTALJLGqU,1944
15
15
  lemonade/common/printing.py,sha256=GFFzrXIineIOMa9yu0lo5sL4j6A5BBg_T9aUCdP-juw,3229
16
16
  lemonade/common/status.py,sha256=xSOZN508cdRtrs1HVyr9zmASYg69EsZBLSs0lroLoCM,16519
17
17
  lemonade/common/system_info.py,sha256=dOtX8WLHCz1xmURZWnqhDbyNZv_AulrpX_bbI58eHFQ,27084
@@ -32,13 +32,13 @@ lemonade/tools/tool.py,sha256=UsxVYukfm_iM3BfeGYPZxQlTK5UfDfDOl3RIyLr8A1Y,13256
32
32
  lemonade/tools/huggingface/bench.py,sha256=-mTfldCtquL4mspq8ykVwDc9Mut5Ecv_jHJnSb0CYGE,6734
33
33
  lemonade/tools/huggingface/load.py,sha256=KsSGOBBD-tNEIfYC8mCWV_jpnkjHMhN3juVmC1Ln4uQ,7745
34
34
  lemonade/tools/huggingface/utils.py,sha256=j1S-IgjDsznUIVwkHSqqChmFyqIx9f3WcEelzohWwvU,13955
35
- lemonade/tools/llamacpp/bench.py,sha256=GBUGOrcDUJdREAIM7GGs4VschqUe-mE_1-MbSUaDjic,4776
35
+ lemonade/tools/llamacpp/bench.py,sha256=1fkE02ecg-jRk92i5dTAXz6re14WH8bd-Z9l-m3lbDA,4844
36
36
  lemonade/tools/llamacpp/load.py,sha256=SKacK2n8LpC4DN4yALyEpV2c8_sgOv2G7t6Nlyu7XXg,6273
37
- lemonade/tools/llamacpp/utils.py,sha256=35eir8PKtxVUDehgb2DJ9hUI0uSjijQgoDK6scmLl1E,22390
37
+ lemonade/tools/llamacpp/utils.py,sha256=vHA5kykkdHSsMGmbEA4RyOHr8wFIh1WenfhCvY8WxZs,22445
38
38
  lemonade/tools/oga/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
39
39
  lemonade/tools/oga/bench.py,sha256=PJXv4UchcS2YPwijNzef8DY4DSAKYxIYY1ycHuH3T34,5005
40
- lemonade/tools/oga/load.py,sha256=XSznW8lOX_KafSq5J5mIBJzj8YJEBpK0RFGcTE1wnE8,28317
41
- lemonade/tools/oga/utils.py,sha256=2N1htWM8QKp5g8nHPvk-w9ZYknSc6fWGXcACRkhsXic,16465
40
+ lemonade/tools/oga/load.py,sha256=O82ezF7Jhgz3CJrxDWZYqLHyD_0NS1nsvfMWDaaUI4I,33728
41
+ lemonade/tools/oga/utils.py,sha256=Xd7tmNr69u_bCut0hZqA7saUR3NFZlp4bvWo54mOZb0,16918
42
42
  lemonade/tools/quark/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
43
43
  lemonade/tools/quark/quark_load.py,sha256=FJ4LJKTToZbHHWVEOBLadae1a3jCnnY4KvXySHbkJMA,5589
44
44
  lemonade/tools/quark/quark_quantize.py,sha256=hwoaXhpBIORvJ16MvewphPkaDEQn3BAgXq5o82Gc-_s,16599
@@ -47,26 +47,26 @@ lemonade/tools/report/llm_report.py,sha256=bVHhwCINA-Ok2EdSwAsLubsc83N3KWOVuwTgu
47
47
  lemonade/tools/report/table.py,sha256=ssqy1bZqF-wptNzKEOj6_9REtCNZyXO8R5vakAtg3R4,27973
48
48
  lemonade/tools/server/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
49
49
  lemonade/tools/server/llamacpp.py,sha256=OP0j74QcowEu3zFEcrKIsBbGDOFemBXS5F5DC6oQHaI,18853
50
- lemonade/tools/server/serve.py,sha256=SXc0qSh-jKS72GlUsuksT7Lov8p3FatgbbycNmHsUfM,57465
50
+ lemonade/tools/server/serve.py,sha256=0-NprfsU-YrX8Qsf1atEi6wPJWemrPjHKEBHV69SwCQ,57046
51
51
  lemonade/tools/server/tool_calls.py,sha256=xrAlQwKG-nv2xLlf8f9CDSaUbyMn8ZtHkds9iZLG9K8,5230
52
52
  lemonade/tools/server/tray.py,sha256=yoGCM8j_2KzPqo-AlYiauWd8QR56yp6jW6HZ9921Ydg,17525
53
53
  lemonade/tools/server/webapp.py,sha256=8Das5yXOaSBLZmSZ_eddJajQFxBhvl5D6GI_hHlGbE0,1040
54
54
  lemonade/tools/server/static/favicon.ico,sha256=hMmP9qGJNeZ0mFS86JIqPbZstXMZn0Z76_HfHQpREAU,126745
55
- lemonade/tools/server/static/styles.css,sha256=JmH9LRGB4HGNCxH14owBrUNNBlzx3cVvB3JJ-xJqDqc,26453
56
- lemonade/tools/server/static/webapp.html,sha256=8khNmsiy4UdjJDkJW3cFeJkmXUR8RQucvCuuka5SNrQ,36230
55
+ lemonade/tools/server/static/styles.css,sha256=8wQ5Cg4rbEh03kC8t7ALE7dB20GiD0Pfu5BAxh9hECU,26429
56
+ lemonade/tools/server/static/webapp.html,sha256=KZm1ZFIhQzLT2Y2wy3hFsQxcOxFzv-blaeLzc1ODhb8,36396
57
57
  lemonade/tools/server/utils/port.py,sha256=XnIg2qS73QRrsJn6LgHcrJPmku30Tv6vsYcBVMj82K4,2186
58
58
  lemonade/tools/server/utils/system_tray.py,sha256=b9lvNv9chJKQxvmH7qzAuUe6H9HsLu7pdHFqGlAJaL0,12654
59
59
  lemonade/tools/server/utils/thread.py,sha256=pK9K_6DNWoQ78NArkAX3Ym2WsxLnCs9sKTk6TitlYnI,2804
60
60
  lemonade_install/__init__.py,sha256=26zohKg2jgr_5y7tObduWMYQg8zCTWMZHL8lfi2zZVQ,40
61
- lemonade_install/install.py,sha256=DJWR36QSjZtvEwRjYPNSjhYgoxLjI_6OPrCMZjL0ChY,28263
62
- lemonade_sdk-8.0.6.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
63
- lemonade_sdk-8.0.6.dist-info/licenses/NOTICE.md,sha256=B8lEqi4QE41J9ljz4Riv2JgHD1v8GCZE6nNBHO3KIA0,2135
64
- lemonade_server/cli.py,sha256=2Un5uLK04fIxlfcTiZ0T_EWbbaq2tYymkUHNFeuvB7g,16041
65
- lemonade_server/model_manager.py,sha256=FfF3z4IpMZqMk_yIo2LHiE76xg7ybROHvi6lcx-0gvE,10754
66
- lemonade_server/pydantic_models.py,sha256=rp_FFhoTwg6jNmgol-kShwffnRDGbt7jTbIeELvgOIo,2876
67
- lemonade_server/server_models.json,sha256=wVyjusS5KkOhlQIl1tCnTR4YYbVm7mLU2rHSFk_39hI,7890
68
- lemonade_sdk-8.0.6.dist-info/METADATA,sha256=g7dOWZPRb0PEyK4UZpVBPnm1LGfZGlkjrdcNfJ_DO_g,15230
69
- lemonade_sdk-8.0.6.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
70
- lemonade_sdk-8.0.6.dist-info/entry_points.txt,sha256=gJppn0ETtXXR6ceKWEIRdk42kMC7ps59EmU3NCPyPUk,144
71
- lemonade_sdk-8.0.6.dist-info/top_level.txt,sha256=10ap5GNiPhalO4V50LRoxA1FqRT9g3Xkia6BITu880k,42
72
- lemonade_sdk-8.0.6.dist-info/RECORD,,
61
+ lemonade_install/install.py,sha256=TBX-VwEHcPo4WX0K_12pKKINnIK3o4SUo3L5XjkqEtw,27669
62
+ lemonade_sdk-8.1.0.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
63
+ lemonade_sdk-8.1.0.dist-info/licenses/NOTICE.md,sha256=B8lEqi4QE41J9ljz4Riv2JgHD1v8GCZE6nNBHO3KIA0,2135
64
+ lemonade_server/cli.py,sha256=6QJ5fxNLuVUbuHauA5JHXf0H5dqJ5E4GNTo4YoMOJtg,16049
65
+ lemonade_server/model_manager.py,sha256=O3fIX52AqU0z10WzPmNEA3lQ_KjOqNq_G-SxjwIgEio,10781
66
+ lemonade_server/pydantic_models.py,sha256=qEvF7x7AuHCHMiByVzGGuLdQTNs233Sw9uQq5cpI6is,2721
67
+ lemonade_server/server_models.json,sha256=gitKHj_VHANxjtcXeE5zFpukVO0HyEfKhu3ZaZsj2xo,8867
68
+ lemonade_sdk-8.1.0.dist-info/METADATA,sha256=c3JxCUYw5ujhGSb3FX3mG6UmgG5BLqik8a5j4oe8n7o,15712
69
+ lemonade_sdk-8.1.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
70
+ lemonade_sdk-8.1.0.dist-info/entry_points.txt,sha256=gJppn0ETtXXR6ceKWEIRdk42kMC7ps59EmU3NCPyPUk,144
71
+ lemonade_sdk-8.1.0.dist-info/top_level.txt,sha256=10ap5GNiPhalO4V50LRoxA1FqRT9g3Xkia6BITu880k,42
72
+ lemonade_sdk-8.1.0.dist-info/RECORD,,
lemonade_server/cli.py CHANGED
@@ -57,7 +57,7 @@ def serve(
57
57
  log_level = log_level if log_level is not None else DEFAULT_LOG_LEVEL
58
58
 
59
59
  # Hidden environment variable to enable input truncation (experimental feature)
60
- truncate_inputs = "LEMONADE_TRUNCATE_INPUTS" in os.environ
60
+ truncate_inputs = os.environ.get("LEMONADE_TRUNCATE_INPUTS", None)
61
61
 
62
62
  # Start the server
63
63
  serve_kwargs = {
@@ -7,6 +7,7 @@ from importlib.metadata import distributions
7
7
  from lemonade_server.pydantic_models import PullConfig
8
8
  from lemonade.cache import DEFAULT_CACHE_DIR
9
9
  from lemonade.tools.llamacpp.utils import parse_checkpoint, download_gguf
10
+ from lemonade.common.network import custom_snapshot_download
10
11
 
11
12
  USER_MODELS_FILE = os.path.join(DEFAULT_CACHE_DIR, "user_models.json")
12
13
 
@@ -175,7 +176,7 @@ class ModelManager:
175
176
  if "gguf" in checkpoint_to_download.lower():
176
177
  download_gguf(gguf_model_config.checkpoint, gguf_model_config.mmproj)
177
178
  else:
178
- huggingface_hub.snapshot_download(repo_id=checkpoint_to_download)
179
+ custom_snapshot_download(checkpoint_to_download)
179
180
 
180
181
  # Register the model in user_models.json, creating that file if needed
181
182
  # We do this registration after the download so that we don't register
@@ -233,8 +234,8 @@ class ModelManager:
233
234
 
234
235
  try:
235
236
  # Get the local path using snapshot_download with local_files_only=True
236
- snapshot_path = huggingface_hub.snapshot_download(
237
- repo_id=base_checkpoint, local_files_only=True
237
+ snapshot_path = custom_snapshot_download(
238
+ base_checkpoint, local_files_only=True
238
239
  )
239
240
 
240
241
  # Navigate up to the model directory (parent of snapshots directory)
@@ -1,4 +1,4 @@
1
- from typing import Optional, Union, List, Any
1
+ from typing import Optional, Union, List
2
2
 
3
3
  from pydantic import BaseModel
4
4
 
@@ -18,9 +18,6 @@ class LoadConfig(BaseModel):
18
18
  model_name: str
19
19
  checkpoint: Optional[str] = None
20
20
  recipe: Optional[str] = None
21
- # Indicates the maximum prompt length allowed for that specific
22
- # checkpoint + recipe combination
23
- max_prompt_length: Optional[int] = None
24
21
  # Indicates whether the model is a reasoning model, like DeepSeek
25
22
  reasoning: Optional[bool] = False
26
23
  # Indicates which Multimodal Projector (mmproj) file to use