lemonade-sdk 8.0.0__py3-none-any.whl → 8.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lemonade-sdk might be problematic. Click here for more details.
- lemonade/tools/report/table.py +9 -0
- lemonade/tools/server/llamacpp.py +34 -16
- lemonade/tools/server/serve.py +15 -0
- lemonade/tools/server/tray.py +19 -6
- lemonade/version.py +1 -1
- {lemonade_sdk-8.0.0.dist-info → lemonade_sdk-8.0.2.dist-info}/METADATA +6 -2
- {lemonade_sdk-8.0.0.dist-info → lemonade_sdk-8.0.2.dist-info}/RECORD +14 -14
- lemonade_server/cli.py +14 -12
- lemonade_server/pydantic_models.py +1 -0
- {lemonade_sdk-8.0.0.dist-info → lemonade_sdk-8.0.2.dist-info}/WHEEL +0 -0
- {lemonade_sdk-8.0.0.dist-info → lemonade_sdk-8.0.2.dist-info}/entry_points.txt +0 -0
- {lemonade_sdk-8.0.0.dist-info → lemonade_sdk-8.0.2.dist-info}/licenses/LICENSE +0 -0
- {lemonade_sdk-8.0.0.dist-info → lemonade_sdk-8.0.2.dist-info}/licenses/NOTICE.md +0 -0
- {lemonade_sdk-8.0.0.dist-info → lemonade_sdk-8.0.2.dist-info}/top_level.txt +0 -0
lemonade/tools/report/table.py
CHANGED
|
@@ -7,6 +7,7 @@ from tabulate import tabulate
|
|
|
7
7
|
import lemonade.common.build as build
|
|
8
8
|
import lemonade.common.filesystem as fs
|
|
9
9
|
from lemonade.cache import Keys
|
|
10
|
+
from lemonade.tools.accuracy import LMEvalHarness
|
|
10
11
|
from lemonade.tools.huggingface.bench import HuggingfaceBench
|
|
11
12
|
from lemonade.tools.llamacpp.bench import LlamaCppBench
|
|
12
13
|
from lemonade.tools.mmlu import AccuracyMMLU
|
|
@@ -527,6 +528,14 @@ class LemonadePerfTable(Table):
|
|
|
527
528
|
".2f",
|
|
528
529
|
)
|
|
529
530
|
],
|
|
531
|
+
LMEvalHarness: [
|
|
532
|
+
AdditionalStat(
|
|
533
|
+
"EleutherAI\nLM Evaluation",
|
|
534
|
+
"^lm_eval_",
|
|
535
|
+
"^lm_eval_",
|
|
536
|
+
".1f",
|
|
537
|
+
)
|
|
538
|
+
],
|
|
530
539
|
},
|
|
531
540
|
"last_columns": [
|
|
532
541
|
SimpleStat(
|
|
@@ -416,25 +416,43 @@ def chat_completion(
|
|
|
416
416
|
exclude_unset=True, exclude_none=True
|
|
417
417
|
)
|
|
418
418
|
|
|
419
|
-
|
|
419
|
+
# Check if streaming is requested
|
|
420
|
+
if chat_completion_request.stream:
|
|
421
|
+
|
|
422
|
+
def event_stream():
|
|
423
|
+
try:
|
|
424
|
+
# Enable streaming
|
|
425
|
+
for chunk in client.chat.completions.create(**request_dict):
|
|
426
|
+
yield f"data: {chunk.model_dump_json()}\n\n"
|
|
427
|
+
yield "data: [DONE]\n\n"
|
|
428
|
+
|
|
429
|
+
# Show telemetry after completion
|
|
430
|
+
telemetry.show_telemetry()
|
|
431
|
+
|
|
432
|
+
except Exception as e: # pylint: disable=broad-exception-caught
|
|
433
|
+
yield f'data: {{"error": "{str(e)}"}}\n\n'
|
|
434
|
+
|
|
435
|
+
return StreamingResponse(
|
|
436
|
+
event_stream(),
|
|
437
|
+
media_type="text/event-stream",
|
|
438
|
+
headers={
|
|
439
|
+
"Cache-Control": "no-cache",
|
|
440
|
+
"Connection": "keep-alive",
|
|
441
|
+
},
|
|
442
|
+
)
|
|
443
|
+
else:
|
|
444
|
+
# Non-streaming response
|
|
420
445
|
try:
|
|
421
|
-
#
|
|
422
|
-
|
|
423
|
-
for chunk in client.chat.completions.create(**request_dict):
|
|
424
|
-
yield f"data: {chunk.model_dump_json()}\n\n"
|
|
425
|
-
yield "data: [DONE]\n\n"
|
|
446
|
+
# Disable streaming for non-streaming requests
|
|
447
|
+
response = client.chat.completions.create(**request_dict)
|
|
426
448
|
|
|
427
449
|
# Show telemetry after completion
|
|
428
450
|
telemetry.show_telemetry()
|
|
429
451
|
|
|
452
|
+
return response
|
|
453
|
+
|
|
430
454
|
except Exception as e: # pylint: disable=broad-exception-caught
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
media_type="text/event-stream",
|
|
436
|
-
headers={
|
|
437
|
-
"Cache-Control": "no-cache",
|
|
438
|
-
"Connection": "keep-alive",
|
|
439
|
-
},
|
|
440
|
-
)
|
|
455
|
+
raise HTTPException(
|
|
456
|
+
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
|
457
|
+
detail=f"Chat completion error: {str(e)}",
|
|
458
|
+
)
|
lemonade/tools/server/serve.py
CHANGED
|
@@ -29,6 +29,7 @@ from openai.types.chat.chat_completion_message_tool_call import (
|
|
|
29
29
|
ChatCompletionMessageToolCall,
|
|
30
30
|
Function,
|
|
31
31
|
)
|
|
32
|
+
from openai.types.completion_usage import CompletionUsage
|
|
32
33
|
from openai.types.chat.chat_completion import Choice
|
|
33
34
|
from openai.types.chat.chat_completion_chunk import (
|
|
34
35
|
ChoiceDelta,
|
|
@@ -576,9 +577,16 @@ class Server(ManagementTool):
|
|
|
576
577
|
logprobs=logprobs,
|
|
577
578
|
)
|
|
578
579
|
|
|
580
|
+
usage = CompletionUsage(
|
|
581
|
+
prompt_tokens=self.input_tokens,
|
|
582
|
+
completion_tokens=self.output_tokens,
|
|
583
|
+
total_tokens=self.input_tokens + self.output_tokens,
|
|
584
|
+
)
|
|
585
|
+
|
|
579
586
|
return Completion(
|
|
580
587
|
id="0",
|
|
581
588
|
choices=[choice],
|
|
589
|
+
usage=usage,
|
|
582
590
|
model=self.llm_loaded.checkpoint,
|
|
583
591
|
object="text_completion",
|
|
584
592
|
created=int(time.time()),
|
|
@@ -773,9 +781,16 @@ class Server(ManagementTool):
|
|
|
773
781
|
logprobs=None,
|
|
774
782
|
)
|
|
775
783
|
|
|
784
|
+
usage = CompletionUsage(
|
|
785
|
+
prompt_tokens=self.input_tokens,
|
|
786
|
+
completion_tokens=self.output_tokens,
|
|
787
|
+
total_tokens=self.input_tokens + self.output_tokens,
|
|
788
|
+
)
|
|
789
|
+
|
|
776
790
|
return ChatCompletion(
|
|
777
791
|
id="0",
|
|
778
792
|
choices=[choice],
|
|
793
|
+
usage=usage,
|
|
779
794
|
model=self.llm_loaded.checkpoint,
|
|
780
795
|
object="chat.completion",
|
|
781
796
|
created=int(time.time()),
|
lemonade/tools/server/tray.py
CHANGED
|
@@ -266,7 +266,7 @@ class LemonadeTray(SystemTray):
|
|
|
266
266
|
self.logger.error(f"Error changing port: {str(e)}")
|
|
267
267
|
self.show_balloon_notification("Error", f"Failed to change port: {str(e)}")
|
|
268
268
|
|
|
269
|
-
def upgrade_to_latest(self,
|
|
269
|
+
def upgrade_to_latest(self, _, __):
|
|
270
270
|
"""
|
|
271
271
|
Download and launch the Lemonade Server installer
|
|
272
272
|
"""
|
|
@@ -281,21 +281,34 @@ class LemonadeTray(SystemTray):
|
|
|
281
281
|
installer_path = os.path.join(
|
|
282
282
|
tempfile.gettempdir(), "Lemonade_Server_Installer.exe"
|
|
283
283
|
)
|
|
284
|
+
if os.path.exists(installer_path):
|
|
285
|
+
os.remove(installer_path)
|
|
284
286
|
|
|
285
287
|
# Download the installer
|
|
286
288
|
response = requests.get(self.latest_version_url, stream=True)
|
|
287
289
|
response.raise_for_status()
|
|
288
290
|
|
|
289
|
-
# Save the installer to disk
|
|
291
|
+
# Save the installer to disk and force write to disk
|
|
290
292
|
with open(installer_path, "wb") as f:
|
|
291
293
|
for chunk in response.iter_content(chunk_size=8192):
|
|
292
294
|
f.write(chunk)
|
|
295
|
+
f.flush()
|
|
296
|
+
os.fsync(f.fileno())
|
|
293
297
|
|
|
294
|
-
# Launch the installer
|
|
295
|
-
subprocess.
|
|
298
|
+
# Launch the installer as a completely detached process
|
|
299
|
+
# subprocess.DETACHED_PROCESS - Creates a process that's not attached to the console
|
|
300
|
+
# subprocess.CREATE_NEW_PROCESS_GROUP - Creates a new process group
|
|
301
|
+
# close_fds=True - Closes file descriptors to prevent inheritance
|
|
302
|
+
subprocess.Popen(
|
|
303
|
+
[installer_path],
|
|
304
|
+
creationflags=subprocess.DETACHED_PROCESS
|
|
305
|
+
| subprocess.CREATE_NEW_PROCESS_GROUP,
|
|
306
|
+
close_fds=True,
|
|
307
|
+
shell=True,
|
|
308
|
+
cwd=tempfile.gettempdir(),
|
|
309
|
+
)
|
|
296
310
|
|
|
297
|
-
#
|
|
298
|
-
self.exit_app(icon, item)
|
|
311
|
+
# No need to quit the application, the installer will handle it
|
|
299
312
|
|
|
300
313
|
def create_menu(self):
|
|
301
314
|
"""
|
lemonade/version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "8.0.
|
|
1
|
+
__version__ = "8.0.2"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: lemonade-sdk
|
|
3
|
-
Version: 8.0.
|
|
3
|
+
Version: 8.0.2
|
|
4
4
|
Summary: Lemonade SDK: Your LLM Aide for Validation and Deployment
|
|
5
5
|
Author-email: lemonade@amd.com
|
|
6
6
|
Requires-Python: >=3.10, <3.12
|
|
@@ -133,7 +133,7 @@ Maximum LLM performance requires the right hardware accelerator with the right i
|
|
|
133
133
|
<tr>
|
|
134
134
|
<td>🎮 GPU</td>
|
|
135
135
|
<td align="center">—</td>
|
|
136
|
-
<td align="center">Vulkan: All platforms<br><small>Focus
|
|
136
|
+
<td align="center">Vulkan: All platforms<br><small>Focus:<br/>Ryzen™ AI 7000/8000/300<br/>Radeon™ 7000/9000</small></td>
|
|
137
137
|
<td align="center">—</td>
|
|
138
138
|
<td align="center">✅</td>
|
|
139
139
|
<td align="center">✅</td>
|
|
@@ -158,6 +158,10 @@ Maximum LLM performance requires the right hardware accelerator with the right i
|
|
|
158
158
|
| **llamacpp** | Community-driven engine with strong GPU acceleration, support for thousands of `.gguf` models, and advanced features such as vision-language models (VLMs) and mixture-of-experts (MoEs). |
|
|
159
159
|
| **Hugging Face (HF)** | Hugging Face's `transformers` library can run the original `.safetensors` trained weights for models on Meta's PyTorch engine, which provides a source of truth for accuracy measurement. |
|
|
160
160
|
|
|
161
|
+
## Integrate Lemonade Server with Your Application
|
|
162
|
+
|
|
163
|
+
Lemonade Server enables languages including Python, C++, Java, C#, Node.js, Go, Ruby, Rust, and PHP. For the full list and integration details, see [docs/server/README.md](./docs/server/README.md).
|
|
164
|
+
|
|
161
165
|
## Contributing
|
|
162
166
|
|
|
163
167
|
We are actively seeking collaborators from across the industry. If you would like to contribute to this project, please check out our [contribution guide](./docs/contribute.md).
|
|
@@ -4,7 +4,7 @@ lemonade/cache.py,sha256=djr2qgyUUAWlQv8FehU9qlNtCwK0IZqo82hcBDyZ3-A,2850
|
|
|
4
4
|
lemonade/cli.py,sha256=XzptHh6LTl5OdGRnxiLykQ8QBl2rQmhWH5w0KPJVyY4,4359
|
|
5
5
|
lemonade/sequence.py,sha256=KSH7BPsiyDKsOsg_ziQKEGsDwMmuO_YbgPRBxkZd0pw,13267
|
|
6
6
|
lemonade/state.py,sha256=sdSezla7Cd7KYL90xY3p9kcNV4ndSyN6UvNLOr3vBMA,5261
|
|
7
|
-
lemonade/version.py,sha256=
|
|
7
|
+
lemonade/version.py,sha256=hTIZ_8cc-ggqcFeOYQQKOHudFQCQNQlM4ZltuYIIjD4,22
|
|
8
8
|
lemonade/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
9
9
|
lemonade/common/build.py,sha256=zTb0m1-kuUx6zw5QHp2SNnVuN6jOTMQ2FCdj9iH374U,6140
|
|
10
10
|
lemonade/common/cli_helpers.py,sha256=hjBfXrTtFl8gmCFlL-ksviXR0mOcdPtTWVNKoEp3PG4,4993
|
|
@@ -42,12 +42,12 @@ lemonade/tools/quark/quark_load.py,sha256=tNy-G9yEJ5cTsxw9LmGUYmmdlEzMo_iy-KSIc2
|
|
|
42
42
|
lemonade/tools/quark/quark_quantize.py,sha256=LZrcbLf9oIw7FW2ccP_qkCP32jxmz5YnNEaoY6rsAuY,16583
|
|
43
43
|
lemonade/tools/report/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
44
44
|
lemonade/tools/report/llm_report.py,sha256=bVHhwCINA-Ok2EdSwAsLubsc83N3KWOVuwTguw7jDcE,6676
|
|
45
|
-
lemonade/tools/report/table.py,sha256=
|
|
45
|
+
lemonade/tools/report/table.py,sha256=VkTv5Vd0HOXudEthCBnFMrWK73Dm2AQP2_B83vEKBzI,25129
|
|
46
46
|
lemonade/tools/server/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
47
|
-
lemonade/tools/server/llamacpp.py,sha256=
|
|
48
|
-
lemonade/tools/server/serve.py,sha256=
|
|
47
|
+
lemonade/tools/server/llamacpp.py,sha256=aDVjjkU2Z2PN25Uuy-lk6ByKPR8kg5r2X-YsVSs4vi8,15624
|
|
48
|
+
lemonade/tools/server/serve.py,sha256=3_jBpi6THnnAmtKOxvPlOkIhSTTmrlZE3fr2Dpto-Q4,52794
|
|
49
49
|
lemonade/tools/server/tool_calls.py,sha256=xrAlQwKG-nv2xLlf8f9CDSaUbyMn8ZtHkds9iZLG9K8,5230
|
|
50
|
-
lemonade/tools/server/tray.py,sha256=
|
|
50
|
+
lemonade/tools/server/tray.py,sha256=OI2uCncs8UgnYFLCKHHXq06RETO2RFEcn4xLzMq-q_c,16675
|
|
51
51
|
lemonade/tools/server/webapp.py,sha256=8Das5yXOaSBLZmSZ_eddJajQFxBhvl5D6GI_hHlGbE0,1040
|
|
52
52
|
lemonade/tools/server/static/favicon.ico,sha256=hMmP9qGJNeZ0mFS86JIqPbZstXMZn0Z76_HfHQpREAU,126745
|
|
53
53
|
lemonade/tools/server/static/styles.css,sha256=u-SzZ-vh5qEFMDSKLHJ7MsQwvwpJLB_DdJxocf06Sro,16880
|
|
@@ -57,14 +57,14 @@ lemonade/tools/server/utils/system_tray.py,sha256=b9lvNv9chJKQxvmH7qzAuUe6H9HsLu
|
|
|
57
57
|
lemonade/tools/server/utils/thread.py,sha256=pK9K_6DNWoQ78NArkAX3Ym2WsxLnCs9sKTk6TitlYnI,2804
|
|
58
58
|
lemonade_install/__init__.py,sha256=26zohKg2jgr_5y7tObduWMYQg8zCTWMZHL8lfi2zZVQ,40
|
|
59
59
|
lemonade_install/install.py,sha256=DJWR36QSjZtvEwRjYPNSjhYgoxLjI_6OPrCMZjL0ChY,28263
|
|
60
|
-
lemonade_sdk-8.0.
|
|
61
|
-
lemonade_sdk-8.0.
|
|
62
|
-
lemonade_server/cli.py,sha256=
|
|
60
|
+
lemonade_sdk-8.0.2.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
61
|
+
lemonade_sdk-8.0.2.dist-info/licenses/NOTICE.md,sha256=B8lEqi4QE41J9ljz4Riv2JgHD1v8GCZE6nNBHO3KIA0,2135
|
|
62
|
+
lemonade_server/cli.py,sha256=z6ojwFaOIz0hbUbVtZWMLP4YDpkcVOmqwmdm55dhKA4,11980
|
|
63
63
|
lemonade_server/model_manager.py,sha256=HqbahDMRv1x8jyQj4pa1rXanlPmcCykt8tlI6WfaxjE,13023
|
|
64
|
-
lemonade_server/pydantic_models.py,sha256=
|
|
64
|
+
lemonade_server/pydantic_models.py,sha256=nsbpHqAkd6nkz5QT16u9xMZbCXqccGiy5O0fWecOM88,2338
|
|
65
65
|
lemonade_server/server_models.json,sha256=wTK_H9XDHLxqMWQJqbBsJwm50PhOR4gURyVj9Jm35PQ,6992
|
|
66
|
-
lemonade_sdk-8.0.
|
|
67
|
-
lemonade_sdk-8.0.
|
|
68
|
-
lemonade_sdk-8.0.
|
|
69
|
-
lemonade_sdk-8.0.
|
|
70
|
-
lemonade_sdk-8.0.
|
|
66
|
+
lemonade_sdk-8.0.2.dist-info/METADATA,sha256=hS5Xn5Pjq0RbdLlhedz3HQMCvkRrMWFoAI0Mao4cHwg,8225
|
|
67
|
+
lemonade_sdk-8.0.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
68
|
+
lemonade_sdk-8.0.2.dist-info/entry_points.txt,sha256=gJppn0ETtXXR6ceKWEIRdk42kMC7ps59EmU3NCPyPUk,144
|
|
69
|
+
lemonade_sdk-8.0.2.dist-info/top_level.txt,sha256=10ap5GNiPhalO4V50LRoxA1FqRT9g3Xkia6BITu880k,42
|
|
70
|
+
lemonade_sdk-8.0.2.dist-info/RECORD,,
|
lemonade_server/cli.py
CHANGED
|
@@ -4,7 +4,6 @@ import os
|
|
|
4
4
|
from typing import Tuple, Optional
|
|
5
5
|
import psutil
|
|
6
6
|
from typing import List
|
|
7
|
-
import subprocess
|
|
8
7
|
|
|
9
8
|
|
|
10
9
|
# Error codes for different CLI scenarios
|
|
@@ -88,23 +87,26 @@ def stop():
|
|
|
88
87
|
# Terminate the main process first
|
|
89
88
|
process.terminate()
|
|
90
89
|
|
|
91
|
-
# Then terminate
|
|
90
|
+
# Then terminate llama-server child process (known to be stubborn)
|
|
91
|
+
# We avoid killing other child processes, such as the installer
|
|
92
92
|
for child in children:
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
93
|
+
if "llama-server" in child.name():
|
|
94
|
+
try:
|
|
95
|
+
child.terminate()
|
|
96
|
+
except psutil.NoSuchProcess:
|
|
97
|
+
pass # Child already terminated
|
|
97
98
|
|
|
98
99
|
# Wait for main process
|
|
99
100
|
process.wait(timeout=10)
|
|
100
101
|
|
|
101
|
-
# Kill
|
|
102
|
+
# Kill llama-server child process if it didn't terminate gracefully
|
|
102
103
|
for child in children:
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
child.
|
|
106
|
-
|
|
107
|
-
|
|
104
|
+
if "llama-server" in child.name():
|
|
105
|
+
try:
|
|
106
|
+
if child.is_running():
|
|
107
|
+
child.kill()
|
|
108
|
+
except psutil.NoSuchProcess:
|
|
109
|
+
pass # Child already terminated
|
|
108
110
|
except psutil.NoSuchProcess:
|
|
109
111
|
# Process already terminated
|
|
110
112
|
pass
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|