lemonade-sdk 7.0.0__py3-none-any.whl → 7.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lemonade-sdk might be problematic. Click here for more details.

@@ -0,0 +1,313 @@
1
+ body {
2
+ margin: 0;
3
+ font-family: 'Segoe UI', 'Arial', sans-serif;
4
+ background: #fffbe9;
5
+ color: #222;
6
+ min-height: 100vh;
7
+ display: flex;
8
+ flex-direction: column;
9
+ padding-bottom: 5rem;
10
+ }
11
+
12
+ .navbar {
13
+ display: flex;
14
+ justify-content: center;
15
+ gap: 2.5rem;
16
+ padding: 2rem 0 1.5rem 0;
17
+ font-size: 1.25rem;
18
+ font-weight: 500;
19
+ background: transparent;
20
+ letter-spacing: 0.02em;
21
+ }
22
+
23
+ .navbar a {
24
+ color: #444;
25
+ text-decoration: none;
26
+ transition: color 0.2s;
27
+ }
28
+
29
+ .navbar a:hover {
30
+ color: #e6b800;
31
+ }
32
+
33
+ .main {
34
+ flex: 1;
35
+ display: flex;
36
+ flex-direction: column;
37
+ align-items: center;
38
+ justify-content: flex-start;
39
+ min-height: 60vh;
40
+ margin-top: 3rem;
41
+ }
42
+
43
+ .title {
44
+ font-size: 3rem;
45
+ font-weight: 700;
46
+ margin-bottom: 2.5rem;
47
+ letter-spacing: 0.01em;
48
+ text-align: center;
49
+ color: #222;
50
+ }
51
+
52
+ .site-footer {
53
+ position: fixed;
54
+ left: 0;
55
+ bottom: 0;
56
+ width: 100%;
57
+ background-color: #fffbe9;
58
+ padding-top: 0.5rem;
59
+ z-index: 100;
60
+ }
61
+
62
+ .dad-joke {
63
+ color: #4ca64c;
64
+ font-size: 1.12rem;
65
+ text-align: center;
66
+ margin-bottom: 0.5rem;
67
+ opacity: 0.98;
68
+ letter-spacing: 0.01em;
69
+ padding: 0.2em 0;
70
+ width: fit-content;
71
+ margin-left: auto;
72
+ margin-right: auto;
73
+ background: none;
74
+ border-radius: 0;
75
+ display: block;
76
+ }
77
+
78
+ .copyright {
79
+ text-align: center;
80
+ font-size: 0.95rem;
81
+ color: #aaa;
82
+ margin-bottom: 0.5rem;
83
+ }
84
+
85
+ /* Tab and Chat UI Styling */
86
+ .tab-container {
87
+ background: #fff;
88
+ box-shadow: 0 2px 8px rgba(0,0,0,0.1);
89
+ margin-bottom: 2em;
90
+ border-radius: 8px;
91
+ border: 1px solid #e0e0e0;
92
+ max-width: 900px;
93
+ width: 100%;
94
+ }
95
+
96
+ .tabs {
97
+ display: flex;
98
+ border-bottom: 1px solid #e0e0e0;
99
+ }
100
+
101
+ .tab {
102
+ padding: 1em 2em;
103
+ cursor: pointer;
104
+ border: none;
105
+ background: none;
106
+ font-size: 1.1em;
107
+ color: #666;
108
+ transition: color 0.2s;
109
+ }
110
+
111
+ .tab.active {
112
+ border-bottom: 2px solid #e6b800;
113
+ color: #e6b800;
114
+ font-weight: bold;
115
+ background: #fafafa;
116
+ }
117
+
118
+ .tab-content {
119
+ display: none;
120
+ padding: 2em;
121
+ background: #fafafa;
122
+ border-radius: 0 0 8px 8px;
123
+ }
124
+
125
+ .tab-content.active {
126
+ display: block;
127
+ }
128
+
129
+ /* Chat UI */
130
+ .chat-container {
131
+ display: flex;
132
+ flex-direction: column;
133
+ height: 400px;
134
+ max-width: 600px;
135
+ margin: 0 auto;
136
+ border: 1px solid #e0e0e0;
137
+ border-radius: 8px;
138
+ background: #fff;
139
+ }
140
+
141
+ .chat-history {
142
+ flex: 1;
143
+ overflow-y: auto;
144
+ padding: 1em;
145
+ border-bottom: 1px solid #e0e0e0;
146
+ display: flex;
147
+ flex-direction: column;
148
+ gap: 0.5em;
149
+ }
150
+
151
+ .chat-message {
152
+ display: flex;
153
+ flex-direction: column;
154
+ align-items: flex-end;
155
+ margin-bottom: 0.5em;
156
+ }
157
+
158
+ .chat-message.user {
159
+ align-items: flex-end;
160
+ }
161
+
162
+ .chat-message.llm {
163
+ align-items: flex-start;
164
+ }
165
+
166
+ .chat-bubble {
167
+ max-width: 70%;
168
+ padding: 0.7em 1.1em;
169
+ border-radius: 20px;
170
+ margin-bottom: 2px;
171
+ font-size: 1em;
172
+ word-break: break-word;
173
+ box-shadow: 0 1px 2px rgba(0,0,0,0.10);
174
+ line-height: 1.5;
175
+ }
176
+
177
+ .chat-bubble.user {
178
+ background: linear-gradient(135deg, #ffe066 60%, #ffd43b 100%);
179
+ color: #222;
180
+ border-bottom-right-radius: 4px;
181
+ align-self: flex-end;
182
+ }
183
+
184
+ .chat-bubble.llm {
185
+ background: #f0f0f0;
186
+ color: #222;
187
+ border-bottom-left-radius: 4px;
188
+ align-self: flex-start;
189
+ }
190
+
191
+ .chat-input-row {
192
+ display: flex;
193
+ gap: 0.5em;
194
+ padding: 1em;
195
+ background: #f9f9f9;
196
+ border-radius: 0 0 8px 8px;
197
+ }
198
+
199
+ .chat-input-row select {
200
+ min-width: 120px;
201
+ background: #fff;
202
+ color: #222;
203
+ border: 1px solid #ddd;
204
+ border-radius: 4px;
205
+ padding: 0.5em;
206
+ }
207
+
208
+ .chat-input-row input[type='text'] {
209
+ flex: 1;
210
+ padding: 0.5em;
211
+ border: 1px solid #ddd;
212
+ border-radius: 4px;
213
+ background: #fff;
214
+ color: #222;
215
+ }
216
+
217
+ .chat-input-row button {
218
+ padding: 0.5em 1.2em;
219
+ background: #e6b800;
220
+ color: #222;
221
+ border: none;
222
+ border-radius: 4px;
223
+ cursor: pointer;
224
+ transition: background 0.2s;
225
+ font-weight: 600;
226
+ }
227
+
228
+ .chat-input-row button:hover {
229
+ background: #d4a500;
230
+ }
231
+
232
+ .chat-input-row button:disabled {
233
+ background: #ccc;
234
+ color: #666;
235
+ cursor: not-allowed;
236
+ }
237
+
238
+ /* Model Management */
239
+ .model-mgmt-container {
240
+ display: flex;
241
+ gap: 2em;
242
+ align-items: flex-start;
243
+ }
244
+
245
+ .model-mgmt-pane {
246
+ flex: 1 1 0;
247
+ min-width: 0;
248
+ }
249
+
250
+ .model-mgmt-pane h3 {
251
+ margin-top: 0;
252
+ color: #222;
253
+ }
254
+
255
+ .model-table {
256
+ width: 100%;
257
+ border-collapse: collapse;
258
+ background: #fff;
259
+ border: 1px solid #ddd;
260
+ border-radius: 4px;
261
+ }
262
+
263
+ .model-table td {
264
+ padding: 0.5em 0.75em;
265
+ vertical-align: middle;
266
+ border-bottom: 1px solid #eee;
267
+ color: #222;
268
+ }
269
+
270
+ .model-table tr:last-child td {
271
+ border-bottom: none;
272
+ }
273
+
274
+ .model-table button {
275
+ background: #e6b800;
276
+ color: #222;
277
+ border: none;
278
+ border-radius: 4px;
279
+ padding: 0.3em 0.8em;
280
+ cursor: pointer;
281
+ font-weight: 600;
282
+ transition: background 0.2s;
283
+ }
284
+
285
+ .model-table button:hover {
286
+ background: #d4a500;
287
+ }
288
+
289
+ .installing-btn {
290
+ background: #ccc !important;
291
+ color: #666 !important;
292
+ font-weight: bold;
293
+ opacity: 1 !important;
294
+ border: 1px solid #999;
295
+ cursor: wait;
296
+ }
297
+
298
+ @media (max-width: 600px) {
299
+ .title {
300
+ font-size: 2rem;
301
+ }
302
+ .navbar {
303
+ font-size: 1rem;
304
+ gap: 1.2rem;
305
+ }
306
+ .main {
307
+ margin-top: 1rem;
308
+ }
309
+ .model-mgmt-container {
310
+ flex-direction: column;
311
+ gap: 1em;
312
+ }
313
+ }
@@ -1,10 +1,10 @@
1
1
  import re
2
- from typing import List, Dict
2
+ from typing import List, Dict, Pattern, Optional
3
3
  import logging
4
4
  import json
5
5
 
6
6
 
7
- def extract_code_block(text):
7
+ def extract_code_block(text: str) -> str:
8
8
  """
9
9
  Extracts the content inside triple backtick code blocks from a text.
10
10
 
@@ -54,67 +54,74 @@ def standardize_tool_call(tool_call: dict) -> dict | None:
54
54
  return standardized_tool_call
55
55
 
56
56
 
57
- def extract_tool_calls(
58
- text: str, added_tokens_decoder: List[str]
59
- ) -> tuple[List[Dict], str]:
57
+ def get_tool_call_pattern(added_tokens_decoder: List[str]) -> Optional[Pattern]:
60
58
  """
61
- Extracts tool calls from generated text based on tool calling identifiers.
62
-
63
- Args:
64
- text (str): The text output generated by the model.
65
- added_tokens_decoder (List[str]): The list of tokens in the tokenizer.added_tokens_decoder.
66
-
67
- Returns:
68
- tuple[List[Dict], str]: A tuple containing:
69
- - List[Dict]: A list of extracted tool call objects (raw JSON-like dicts)
70
- - str: The original text with tool calls removed
59
+ Extracts tool call pattern from the added tokens decoder.
71
60
  """
72
- matches = []
73
61
  special_tokens = [v.content for v in added_tokens_decoder.values()]
74
62
 
75
63
  # Pattern 1: <tool_call>...</tool_call> block
76
64
  # Sample model that uses this pattern: Qwen3-8B
77
65
  if "<tool_call>" in special_tokens and "</tool_call>" in special_tokens:
78
- tool_call_pattern = re.compile(r"<tool_call>(.*?)</tool_call>", re.DOTALL)
79
- matches = list(tool_call_pattern.finditer(text))
66
+ return re.compile(r"<tool_call>(.*?)</tool_call>", re.DOTALL)
80
67
 
81
68
  # Pattern 2: [TOOL_CALLS] [ {...} ] block
82
69
  # Sample model that uses this pattern: Mistral-7B-Instruct-v0.3
83
70
  elif "[TOOL_CALLS]" in special_tokens:
84
- tool_call_pattern = re.compile(
85
- r"\[TOOL_CALLS\]\s*\[(.*?)\](?=\s*<|/?eos|$)", re.DOTALL
86
- )
87
- matches = list(tool_call_pattern.finditer(text))
71
+ return re.compile(r"\[TOOL_CALLS\]\s*\[(.*?)\](?=\s*<|/?eos|$)", re.DOTALL)
88
72
 
89
73
  else:
90
74
  logging.warning(
91
75
  "Tool calling identifiers were not found for the current model."
92
76
  )
77
+ return None
78
+
79
+
80
+ def extract_tool_calls(
81
+ text: str, tool_call_pattern: Optional[Pattern] = None
82
+ ) -> tuple[List[Dict], str]:
83
+ """
84
+ Extracts tool calls from generated text based on tool calling identifiers.
85
+
86
+ Args:
87
+ text (str): The text output generated by the model.
88
+ tool_call_pattern (Optional[Pattern]): The pattern to use to extract tool calls.
89
+
90
+ Returns:
91
+ tuple[List[Dict], str]: A tuple containing:
92
+ - List[Dict]: A list of extracted tool call objects (raw JSON-like dicts)
93
+ - str: The original text with tool calls removed
94
+ """
95
+
96
+ matches = []
97
+ if tool_call_pattern is not None:
98
+ matches = list(tool_call_pattern.finditer(text))
93
99
 
94
100
  # Some models don't use any tool calling identifiers.
95
101
  # Instead, tool calls are identified by only generating JSON content.
96
102
  # Sample model that uses this pattern: Llama-3.1-8B-Instruct
97
- try:
98
- # Remove the json for a code block if needed
99
- parsed_text = extract_code_block(text)
100
- json_tool_calls = json.loads(parsed_text)
101
-
102
- if isinstance(json_tool_calls, dict):
103
- json_tool_calls = [json_tool_calls]
104
-
105
- extracted_tool_calls = []
106
- for tool_call in json_tool_calls:
107
- # Return the tool call if all calls are valid
108
- standard_tool_call = standardize_tool_call(tool_call)
109
- if standard_tool_call is not None:
110
- extracted_tool_calls.append(standard_tool_call)
111
- else:
112
- return [], text
113
-
114
- return extracted_tool_calls, ""
115
-
116
- except json.JSONDecodeError:
117
- pass
103
+ else:
104
+ try:
105
+ # Remove the json for a code block if needed
106
+ parsed_text = extract_code_block(text)
107
+ json_tool_calls = json.loads(parsed_text)
108
+
109
+ if isinstance(json_tool_calls, dict):
110
+ json_tool_calls = [json_tool_calls]
111
+
112
+ extracted_tool_calls = []
113
+ for tool_call in json_tool_calls:
114
+ # Return the tool call if all calls are valid
115
+ standard_tool_call = standardize_tool_call(tool_call)
116
+ if standard_tool_call is not None:
117
+ extracted_tool_calls.append(standard_tool_call)
118
+ else:
119
+ return [], text
120
+
121
+ return extracted_tool_calls, ""
122
+
123
+ except json.JSONDecodeError:
124
+ pass
118
125
 
119
126
  # Process matches in reverse to avoid position shifting
120
127
  extracted_tool_calls = []
lemonade/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "7.0.0"
1
+ __version__ = "7.0.1"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lemonade-sdk
3
- Version: 7.0.0
3
+ Version: 7.0.1
4
4
  Summary: Lemonade SDK: Your LLM Aide for Validation and Deployment
5
5
  Author-email: lemonade@amd.com
6
6
  Requires-Python: >=3.10, <3.12
@@ -25,8 +25,8 @@ Requires-Dist: matplotlib
25
25
  Requires-Dist: tabulate
26
26
  Requires-Dist: huggingface-hub==0.30.2
27
27
  Provides-Extra: llm
28
- Requires-Dist: torch>=2.0.0; extra == "llm"
29
- Requires-Dist: transformers; extra == "llm"
28
+ Requires-Dist: torch>=2.6.0; extra == "llm"
29
+ Requires-Dist: transformers<=4.51.3; extra == "llm"
30
30
  Requires-Dist: accelerate; extra == "llm"
31
31
  Requires-Dist: py-cpuinfo; extra == "llm"
32
32
  Requires-Dist: sentencepiece; extra == "llm"
@@ -34,23 +34,20 @@ Requires-Dist: datasets; extra == "llm"
34
34
  Requires-Dist: human-eval-windows==1.0.4; extra == "llm"
35
35
  Requires-Dist: fastapi; extra == "llm"
36
36
  Requires-Dist: uvicorn[standard]; extra == "llm"
37
- Requires-Dist: openai>=1.66.0; extra == "llm"
37
+ Requires-Dist: openai>=1.81.0; extra == "llm"
38
38
  Requires-Dist: lm-eval[api]; extra == "llm"
39
39
  Provides-Extra: llm-oga-cpu
40
40
  Requires-Dist: onnxruntime-genai==0.6.0; extra == "llm-oga-cpu"
41
41
  Requires-Dist: onnxruntime<1.22.0,>=1.10.1; extra == "llm-oga-cpu"
42
- Requires-Dist: torch<2.4,>=2.0.0; extra == "llm-oga-cpu"
43
42
  Requires-Dist: lemonade-sdk[llm]; extra == "llm-oga-cpu"
44
43
  Provides-Extra: llm-oga-igpu
45
44
  Requires-Dist: onnxruntime-genai-directml==0.6.0; extra == "llm-oga-igpu"
46
45
  Requires-Dist: onnxruntime-directml<1.22.0,>=1.19.0; extra == "llm-oga-igpu"
47
- Requires-Dist: torch<2.4,>=2.0.0; extra == "llm-oga-igpu"
48
46
  Requires-Dist: transformers<4.45.0; extra == "llm-oga-igpu"
49
47
  Requires-Dist: lemonade-sdk[llm]; extra == "llm-oga-igpu"
50
48
  Provides-Extra: llm-oga-cuda
51
49
  Requires-Dist: onnxruntime-genai-cuda==0.6.0; extra == "llm-oga-cuda"
52
50
  Requires-Dist: onnxruntime-gpu<1.22.0,>=1.19.1; extra == "llm-oga-cuda"
53
- Requires-Dist: torch<2.4,>=2.0.0; extra == "llm-oga-cuda"
54
51
  Requires-Dist: transformers<4.45.0; extra == "llm-oga-cuda"
55
52
  Requires-Dist: lemonade-sdk[llm]; extra == "llm-oga-cuda"
56
53
  Provides-Extra: llm-oga-npu
@@ -4,7 +4,7 @@ lemonade/cache.py,sha256=djr2qgyUUAWlQv8FehU9qlNtCwK0IZqo82hcBDyZ3-A,2850
4
4
  lemonade/cli.py,sha256=_s-LWpaVIhOmaP0Q1qirXxNiBhdumAZ-5ub5-lRNccs,4351
5
5
  lemonade/sequence.py,sha256=KSH7BPsiyDKsOsg_ziQKEGsDwMmuO_YbgPRBxkZd0pw,13267
6
6
  lemonade/state.py,sha256=sdSezla7Cd7KYL90xY3p9kcNV4ndSyN6UvNLOr3vBMA,5261
7
- lemonade/version.py,sha256=VgMOOqsYbyb60I1RmlZpqwqQ0C0IyT3R0c8_xX4pRGM,22
7
+ lemonade/version.py,sha256=co6LyaBArt-ahHXYZSdSER8TFZ2vVTb86CNG6X8Pxwc,22
8
8
  lemonade/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
9
  lemonade/common/analyze_model.py,sha256=sYWDznEUEWjx_Qekg7f1hHY4Pfe87IQ77lmsWqePgE0,803
10
10
  lemonade/common/build.py,sha256=Pk86mCr6fyBIx2zXDpq0BkdahlCmWRnwSTpShA_gwZw,7849
@@ -45,17 +45,21 @@ lemonade/tools/report/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3h
45
45
  lemonade/tools/report/llm_report.py,sha256=bVHhwCINA-Ok2EdSwAsLubsc83N3KWOVuwTguw7jDcE,6676
46
46
  lemonade/tools/report/table.py,sha256=a0TXo1X84RxCSu0un_XM3ANOlhLtPDuqtGwR7eomf2s,24853
47
47
  lemonade/tools/server/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
48
- lemonade/tools/server/serve.py,sha256=VCBeu-aVYC5BjEo5mzKIjsN48YKCNoxXgDggXxmRgy8,49941
49
- lemonade/tools/server/tool_calls.py,sha256=8l8K94Hfh2Q1421kUNECDFzeE7CNtvX9MKsdLVUdIng,5005
48
+ lemonade/tools/server/instructions.py,sha256=Lvm-tRZaYgHkyt3zQkmMChkXO6rUiLoIAunudmMr_D8,13388
49
+ lemonade/tools/server/llamacpp.py,sha256=PeHg1DbMGcf68txFgC1CJJN5HRHEnIJ4_4EDhvqAFUI,9255
50
+ lemonade/tools/server/pydantic_models.py,sha256=z1RAs9hkAFkOfMiTPtmUiC3CD2P6OMI2N0J2ztNs0d4,2179
51
+ lemonade/tools/server/serve.py,sha256=7meKOKVHaODHBYD_3dDJyaiwoC_m4z_FWniZfsZ9cCI,50655
52
+ lemonade/tools/server/tool_calls.py,sha256=xrAlQwKG-nv2xLlf8f9CDSaUbyMn8ZtHkds9iZLG9K8,5230
53
+ lemonade/tools/server/static/styles.css,sha256=8U1EejQaqRLQ6QTCF5UG_dLPtLjRwT1menUHMDhaq2M,5045
50
54
  lemonade_install/__init__.py,sha256=26zohKg2jgr_5y7tObduWMYQg8zCTWMZHL8lfi2zZVQ,40
51
55
  lemonade_install/install.py,sha256=61qUO7kWCLcdjK0_IQZ46-rKP_AWkyznh4YpDclPKyM,28036
52
- lemonade_sdk-7.0.0.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
53
- lemonade_sdk-7.0.0.dist-info/licenses/NOTICE.md,sha256=B8lEqi4QE41J9ljz4Riv2JgHD1v8GCZE6nNBHO3KIA0,2135
54
- lemonade_server/cli.py,sha256=PPW6Rvmqp4Nzadsn49mrW532XlqPYQQGApnubgF-xVg,7270
55
- lemonade_server/model_manager.py,sha256=Z0FmP7DVru1Rv0MNPDfRHLB8rORyGBzB1-ty9uLvGYA,3477
56
- lemonade_server/server_models.json,sha256=ys9oOlwBo0TmWjcNU1JsvGOAcxVCbt9U0tLzP7vGIN8,4997
57
- lemonade_sdk-7.0.0.dist-info/METADATA,sha256=9DvnEjMCXsNHrPw5EenEIgy4jlj8sDTiWB3HsdfN_wA,5608
58
- lemonade_sdk-7.0.0.dist-info/WHEEL,sha256=Nw36Djuh_5VDukK0H78QzOX-_FQEo6V37m3nkm96gtU,91
59
- lemonade_sdk-7.0.0.dist-info/entry_points.txt,sha256=gJppn0ETtXXR6ceKWEIRdk42kMC7ps59EmU3NCPyPUk,144
60
- lemonade_sdk-7.0.0.dist-info/top_level.txt,sha256=10ap5GNiPhalO4V50LRoxA1FqRT9g3Xkia6BITu880k,42
61
- lemonade_sdk-7.0.0.dist-info/RECORD,,
56
+ lemonade_sdk-7.0.1.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
57
+ lemonade_sdk-7.0.1.dist-info/licenses/NOTICE.md,sha256=B8lEqi4QE41J9ljz4Riv2JgHD1v8GCZE6nNBHO3KIA0,2135
58
+ lemonade_server/cli.py,sha256=DR6sIt66K1sZZG3ascEw_6HUgz3UhU9KGUyzxf4nO_A,7351
59
+ lemonade_server/model_manager.py,sha256=WDGDxrKjq-u2GkGWLNUsRk0d74J-RG2yCYEnH8WMnDw,4010
60
+ lemonade_server/server_models.json,sha256=ZSg1R555bLVW4U7BPaYX5ZgwaJVNAP3z1C62dzMRqAM,6198
61
+ lemonade_sdk-7.0.1.dist-info/METADATA,sha256=bvg9-Tzg_v8sTKjkAJtLahpDq_GmLDMDKA9PTisaNGw,5443
62
+ lemonade_sdk-7.0.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
63
+ lemonade_sdk-7.0.1.dist-info/entry_points.txt,sha256=gJppn0ETtXXR6ceKWEIRdk42kMC7ps59EmU3NCPyPUk,144
64
+ lemonade_sdk-7.0.1.dist-info/top_level.txt,sha256=10ap5GNiPhalO4V50LRoxA1FqRT9g3Xkia6BITu880k,42
65
+ lemonade_sdk-7.0.1.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.7.1)
2
+ Generator: setuptools (80.9.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
lemonade_server/cli.py CHANGED
@@ -92,7 +92,7 @@ def pull(model_names: List[str]):
92
92
  if server_running:
93
93
  import requests
94
94
 
95
- base_url = f"http://localhost:{port}/api/v0"
95
+ base_url = f"http://localhost:{port}/api/v1"
96
96
 
97
97
  for model_name in model_names:
98
98
  # Install the model
@@ -143,7 +143,7 @@ def status(verbose: bool = True) -> Tuple[bool, int]:
143
143
 
144
144
  def is_lemonade_server(pid):
145
145
  """
146
- Check wether or not a given PID corresponds to a Lemonade server
146
+ Check whether or not a given PID corresponds to a Lemonade server
147
147
  """
148
148
  try:
149
149
  process = psutil.Process(pid)
@@ -158,6 +158,8 @@ def is_lemonade_server(pid):
158
158
  "lemonade",
159
159
  ]:
160
160
  return True
161
+ elif "llama-server" in process.name():
162
+ return False
161
163
  if not process.parent():
162
164
  return False
163
165
  process = process.parent()
@@ -50,7 +50,7 @@ class ModelManager:
50
50
  downloaded_models = {}
51
51
  for model in self.supported_models:
52
52
  if (
53
- self.supported_models[model]["checkpoint"]
53
+ self.supported_models[model]["checkpoint"].split(":")[0]
54
54
  in self.downloaded_hf_checkpoints
55
55
  ):
56
56
  downloaded_models[model] = self.supported_models[model]
@@ -62,22 +62,17 @@ class ModelManager:
62
62
  Returns a dictionary of locally available models that are enabled by
63
63
  the current installation.
64
64
  """
65
- hybrid_installed = (
66
- "onnxruntime-vitisai" in pkg_resources.working_set.by_key
67
- and "onnxruntime-genai-directml-ryzenai" in pkg_resources.working_set.by_key
68
- )
65
+ return self.filter_models_by_backend(self.downloaded_models)
69
66
 
70
- downloaded_models_enabled = {}
71
- for model, value in self.downloaded_models.items():
72
- if value["recipe"] == "oga-hybrid" and hybrid_installed:
73
- downloaded_models_enabled[model] = value
74
- else:
75
- # All other models are CPU models right now
76
- # This logic will get more sophisticated when we
77
- # start to support more backends
78
- downloaded_models_enabled[model] = value
79
-
80
- return downloaded_models_enabled
67
+ def download_gguf(self, checkpoint) -> str:
68
+ # The colon after the checkpoint name indicates which
69
+ # specific GGUF to download
70
+ repo_id = checkpoint.split(":")[0]
71
+ pattern_to_match = f'*{checkpoint.split(":")[1]}.gguf'
72
+ return huggingface_hub.snapshot_download(
73
+ repo_id=repo_id,
74
+ allow_patterns=[pattern_to_match],
75
+ )
81
76
 
82
77
  def download_models(self, models: list[str]):
83
78
  """
@@ -91,7 +86,29 @@ class ModelManager:
91
86
  )
92
87
  checkpoint = self.supported_models[model]["checkpoint"]
93
88
  print(f"Downloading {model} ({checkpoint})")
94
- huggingface_hub.snapshot_download(repo_id=checkpoint)
89
+
90
+ if "gguf" in checkpoint.lower():
91
+ self.download_gguf(checkpoint)
92
+ else:
93
+ huggingface_hub.snapshot_download(repo_id=checkpoint)
94
+
95
+ def filter_models_by_backend(self, models: dict) -> dict:
96
+ """
97
+ Returns a filtered dict of models that are enabled by the
98
+ current environment.
99
+ """
100
+ hybrid_installed = (
101
+ "onnxruntime-vitisai" in pkg_resources.working_set.by_key
102
+ and "onnxruntime-genai-directml-ryzenai" in pkg_resources.working_set.by_key
103
+ )
104
+ filtered = {}
105
+ for model, value in models.items():
106
+ if value.get("recipe") == "oga-hybrid":
107
+ if hybrid_installed:
108
+ filtered[model] = value
109
+ else:
110
+ filtered[model] = value
111
+ return filtered
95
112
 
96
113
 
97
114
  # This file was originally licensed under Apache 2.0. It has been modified.
@@ -138,5 +138,47 @@
138
138
  "recipe": "oga-igpu",
139
139
  "reasoning": false,
140
140
  "suggested": false
141
+ },
142
+ "Qwen3-0.6B-GGUF": {
143
+ "checkpoint": "unsloth/Qwen3-0.6B-GGUF:Q4_0",
144
+ "recipe": "llamacpp",
145
+ "reasoning": true,
146
+ "suggested": true
147
+ },
148
+ "Qwen3-1.7B-GGUF": {
149
+ "checkpoint": "unsloth/Qwen3-1.7B-GGUF:Q4_0",
150
+ "recipe": "llamacpp",
151
+ "reasoning": true,
152
+ "suggested": true
153
+ },
154
+ "Qwen3-4B-GGUF": {
155
+ "checkpoint": "unsloth/Qwen3-4B-GGUF:Q4_0",
156
+ "recipe": "llamacpp",
157
+ "reasoning": true,
158
+ "suggested": true
159
+ },
160
+ "Qwen3-8B-GGUF": {
161
+ "checkpoint": "unsloth/Qwen3-8B-GGUF:Q4_0",
162
+ "recipe": "llamacpp",
163
+ "reasoning": true,
164
+ "suggested": true
165
+ },
166
+ "DeepSeek-Qwen3-8B-GGUF": {
167
+ "checkpoint": "unsloth/DeepSeek-R1-0528-Qwen3-8B-GGUF:Q4_1",
168
+ "recipe": "llamacpp",
169
+ "reasoning": true,
170
+ "suggested": true
171
+ },
172
+ "Qwen3-14B-GGUF": {
173
+ "checkpoint": "unsloth/Qwen3-14B-GGUF:Q4_0",
174
+ "recipe": "llamacpp",
175
+ "reasoning": true,
176
+ "suggested": true
177
+ },
178
+ "Qwen3-30B-A3B-GGUF": {
179
+ "checkpoint": "unsloth/Qwen3-30B-A3B-GGUF:Q4_0",
180
+ "recipe": "llamacpp",
181
+ "reasoning": true,
182
+ "suggested": true
141
183
  }
142
184
  }