lemonade-sdk 8.1.9__py3-none-any.whl → 8.1.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lemonade-sdk might be problematic. Click here for more details.
- lemonade/common/inference_engines.py +13 -4
- lemonade/common/system_info.py +570 -1
- lemonade/tools/flm/__init__.py +1 -0
- lemonade/tools/flm/utils.py +255 -0
- lemonade/tools/llamacpp/utils.py +62 -13
- lemonade/tools/server/flm.py +137 -0
- lemonade/tools/server/llamacpp.py +23 -5
- lemonade/tools/server/serve.py +292 -135
- lemonade/tools/server/static/js/chat.js +165 -82
- lemonade/tools/server/static/js/models.js +87 -54
- lemonade/tools/server/static/js/shared.js +5 -3
- lemonade/tools/server/static/logs.html +47 -0
- lemonade/tools/server/static/styles.css +159 -8
- lemonade/tools/server/static/webapp.html +28 -10
- lemonade/tools/server/tray.py +158 -38
- lemonade/tools/server/utils/macos_tray.py +226 -0
- lemonade/tools/server/utils/{system_tray.py → windows_tray.py} +13 -0
- lemonade/tools/server/webapp.py +4 -1
- lemonade/tools/server/wrapped_server.py +91 -25
- lemonade/version.py +1 -1
- lemonade_install/install.py +25 -2
- {lemonade_sdk-8.1.9.dist-info → lemonade_sdk-8.1.11.dist-info}/METADATA +9 -6
- {lemonade_sdk-8.1.9.dist-info → lemonade_sdk-8.1.11.dist-info}/RECORD +33 -28
- lemonade_server/cli.py +105 -14
- lemonade_server/model_manager.py +186 -45
- lemonade_server/pydantic_models.py +25 -1
- lemonade_server/server_models.json +162 -62
- lemonade_server/settings.py +39 -39
- {lemonade_sdk-8.1.9.dist-info → lemonade_sdk-8.1.11.dist-info}/WHEEL +0 -0
- {lemonade_sdk-8.1.9.dist-info → lemonade_sdk-8.1.11.dist-info}/entry_points.txt +0 -0
- {lemonade_sdk-8.1.9.dist-info → lemonade_sdk-8.1.11.dist-info}/licenses/LICENSE +0 -0
- {lemonade_sdk-8.1.9.dist-info → lemonade_sdk-8.1.11.dist-info}/licenses/NOTICE.md +0 -0
- {lemonade_sdk-8.1.9.dist-info → lemonade_sdk-8.1.11.dist-info}/top_level.txt +0 -0
|
@@ -58,8 +58,22 @@ class WrappedServerTelemetry(ABC):
|
|
|
58
58
|
telemetry = [
|
|
59
59
|
["Input tokens", self.input_tokens],
|
|
60
60
|
["Output tokens", self.output_tokens],
|
|
61
|
-
[
|
|
62
|
-
|
|
61
|
+
[
|
|
62
|
+
"TTFT (s)",
|
|
63
|
+
(
|
|
64
|
+
f"{self.time_to_first_token:.2f}"
|
|
65
|
+
if self.time_to_first_token is not None
|
|
66
|
+
else "N/A"
|
|
67
|
+
),
|
|
68
|
+
],
|
|
69
|
+
[
|
|
70
|
+
"TPS",
|
|
71
|
+
(
|
|
72
|
+
f"{self.tokens_per_second:.2f}"
|
|
73
|
+
if self.tokens_per_second is not None
|
|
74
|
+
else "N/A"
|
|
75
|
+
),
|
|
76
|
+
],
|
|
63
77
|
]
|
|
64
78
|
|
|
65
79
|
table = tabulate(
|
|
@@ -83,7 +97,7 @@ class WrappedServer(ABC):
|
|
|
83
97
|
self.telemetry: WrappedServerTelemetry = telemetry
|
|
84
98
|
self.log_thread_exception = None
|
|
85
99
|
|
|
86
|
-
def
|
|
100
|
+
def _choose_port(self):
|
|
87
101
|
"""
|
|
88
102
|
Users probably don't care what port we start the wrapped server on, so let's
|
|
89
103
|
search for an empty port
|
|
@@ -318,18 +332,44 @@ class WrappedServer(ABC):
|
|
|
318
332
|
if chat_completion_request.stream:
|
|
319
333
|
|
|
320
334
|
def event_stream():
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
for chunk in client.chat.completions.create(**openai_client_params):
|
|
325
|
-
yield f"data: {chunk.model_dump_json()}\n\n"
|
|
326
|
-
yield "data: [DONE]\n\n"
|
|
327
|
-
|
|
328
|
-
# Show telemetry after completion
|
|
329
|
-
self.telemetry.show_telemetry()
|
|
335
|
+
# Ensure streaming is enabled in params
|
|
336
|
+
stream_params = dict(openai_client_params)
|
|
337
|
+
stream_params["stream"] = True
|
|
330
338
|
|
|
331
|
-
|
|
332
|
-
|
|
339
|
+
# Use streaming context so we can explicitly close on cancellation
|
|
340
|
+
with client.chat.completions.with_streaming_response.create(
|
|
341
|
+
# pylint: disable=missing-kwoa
|
|
342
|
+
**stream_params,
|
|
343
|
+
) as response:
|
|
344
|
+
try:
|
|
345
|
+
for line in response.iter_lines():
|
|
346
|
+
# Preserve SSE event boundaries: blank line separates events
|
|
347
|
+
if line == b"" or line == "":
|
|
348
|
+
yield "\n"
|
|
349
|
+
continue
|
|
350
|
+
if isinstance(line, bytes):
|
|
351
|
+
try:
|
|
352
|
+
line = line.decode("utf-8", errors="ignore")
|
|
353
|
+
except (UnicodeDecodeError, LookupError):
|
|
354
|
+
# Skip lines that fail decoding due to encoding issues
|
|
355
|
+
continue
|
|
356
|
+
# Forward SSE lines as-is
|
|
357
|
+
if not line.endswith("\n"):
|
|
358
|
+
line += "\n"
|
|
359
|
+
yield line
|
|
360
|
+
|
|
361
|
+
# Show telemetry after completion
|
|
362
|
+
self.telemetry.show_telemetry()
|
|
363
|
+
|
|
364
|
+
except GeneratorExit:
|
|
365
|
+
# Client disconnected/cancelled; close upstream stream and stop
|
|
366
|
+
try:
|
|
367
|
+
response.close()
|
|
368
|
+
except Exception: # pylint: disable=broad-exception-caught
|
|
369
|
+
pass
|
|
370
|
+
raise
|
|
371
|
+
except Exception as e: # pylint: disable=broad-exception-caught
|
|
372
|
+
yield f'data: {{"error": "{str(e)}"}}\n\n'
|
|
333
373
|
|
|
334
374
|
return StreamingResponse(
|
|
335
375
|
event_stream(),
|
|
@@ -387,18 +427,44 @@ class WrappedServer(ABC):
|
|
|
387
427
|
if completion_request.stream:
|
|
388
428
|
|
|
389
429
|
def event_stream():
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
for chunk in client.completions.create(**openai_client_params):
|
|
394
|
-
yield f"data: {chunk.model_dump_json()}\n\n"
|
|
395
|
-
yield "data: [DONE]\n\n"
|
|
396
|
-
|
|
397
|
-
# Show telemetry after completion
|
|
398
|
-
self.telemetry.show_telemetry()
|
|
430
|
+
# Ensure streaming is enabled in params
|
|
431
|
+
stream_params = dict(openai_client_params)
|
|
432
|
+
stream_params["stream"] = True
|
|
399
433
|
|
|
400
|
-
|
|
401
|
-
|
|
434
|
+
# Use streaming context so we can explicitly close on cancellation
|
|
435
|
+
with client.completions.with_streaming_response.create(
|
|
436
|
+
# pylint: disable=missing-kwoa
|
|
437
|
+
**stream_params,
|
|
438
|
+
) as response:
|
|
439
|
+
try:
|
|
440
|
+
for line in response.iter_lines():
|
|
441
|
+
# Preserve SSE event boundaries: blank line separates events
|
|
442
|
+
if line == b"" or line == "":
|
|
443
|
+
yield "\n"
|
|
444
|
+
continue
|
|
445
|
+
if isinstance(line, bytes):
|
|
446
|
+
try:
|
|
447
|
+
line = line.decode("utf-8", errors="ignore")
|
|
448
|
+
except (UnicodeDecodeError, LookupError):
|
|
449
|
+
# Skip lines that fail decoding due to encoding issues
|
|
450
|
+
continue
|
|
451
|
+
# Forward SSE lines as-is
|
|
452
|
+
if not line.endswith("\n"):
|
|
453
|
+
line += "\n"
|
|
454
|
+
yield line
|
|
455
|
+
|
|
456
|
+
# Show telemetry after completion
|
|
457
|
+
self.telemetry.show_telemetry()
|
|
458
|
+
|
|
459
|
+
except GeneratorExit:
|
|
460
|
+
# Client disconnected/cancelled; close upstream stream and stop
|
|
461
|
+
try:
|
|
462
|
+
response.close()
|
|
463
|
+
except Exception: # pylint: disable=broad-exception-caught
|
|
464
|
+
pass
|
|
465
|
+
raise
|
|
466
|
+
except Exception as e: # pylint: disable=broad-exception-caught
|
|
467
|
+
yield f'data: {{"error": "{str(e)}"}}\n\n'
|
|
402
468
|
|
|
403
469
|
return StreamingResponse(
|
|
404
470
|
event_stream(),
|
lemonade/version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "8.1.
|
|
1
|
+
__version__ = "8.1.11"
|
lemonade_install/install.py
CHANGED
|
@@ -447,6 +447,12 @@ class Install:
|
|
|
447
447
|
choices=["rocm", "vulkan"],
|
|
448
448
|
)
|
|
449
449
|
|
|
450
|
+
parser.add_argument(
|
|
451
|
+
"--flm",
|
|
452
|
+
action="store_true",
|
|
453
|
+
help="Install FLM (FastFlowLM) for running local language models",
|
|
454
|
+
)
|
|
455
|
+
|
|
450
456
|
parser.add_argument(
|
|
451
457
|
"--override",
|
|
452
458
|
action="store_true",
|
|
@@ -727,19 +733,33 @@ class Install:
|
|
|
727
733
|
|
|
728
734
|
install_llamacpp(backend)
|
|
729
735
|
|
|
736
|
+
@staticmethod
|
|
737
|
+
def _install_flm():
|
|
738
|
+
"""
|
|
739
|
+
Install FLM (FastFlowLM) for running local language models.
|
|
740
|
+
"""
|
|
741
|
+
|
|
742
|
+
# Check if the processor is supported before proceeding
|
|
743
|
+
check_ryzen_ai_processor()
|
|
744
|
+
|
|
745
|
+
from lemonade.tools.flm.utils import install_flm
|
|
746
|
+
|
|
747
|
+
install_flm()
|
|
748
|
+
|
|
730
749
|
def run(
|
|
731
750
|
self,
|
|
732
751
|
ryzenai: Optional[str] = None,
|
|
733
752
|
build_model: Optional[str] = None,
|
|
734
753
|
llamacpp: Optional[str] = None,
|
|
754
|
+
flm: Optional[bool] = None,
|
|
735
755
|
yes: bool = False,
|
|
736
756
|
token: Optional[str] = None,
|
|
737
757
|
override: bool = False,
|
|
738
758
|
):
|
|
739
|
-
if ryzenai is None and llamacpp is None:
|
|
759
|
+
if ryzenai is None and llamacpp is None and flm is None:
|
|
740
760
|
raise ValueError(
|
|
741
761
|
"You must select something to install, "
|
|
742
|
-
"for example `--
|
|
762
|
+
"for example `--llamacpp`, `--flm`, or `--ryzenai`"
|
|
743
763
|
)
|
|
744
764
|
|
|
745
765
|
if ryzenai is not None:
|
|
@@ -748,6 +768,9 @@ class Install:
|
|
|
748
768
|
if llamacpp is not None:
|
|
749
769
|
self._install_llamacpp(llamacpp)
|
|
750
770
|
|
|
771
|
+
if flm:
|
|
772
|
+
self._install_flm()
|
|
773
|
+
|
|
751
774
|
|
|
752
775
|
def main():
|
|
753
776
|
installer = Install()
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: lemonade-sdk
|
|
3
|
-
Version: 8.1.
|
|
3
|
+
Version: 8.1.11
|
|
4
4
|
Summary: Lemonade SDK: Your LLM Aide for Validation and Deployment
|
|
5
5
|
Author-email: lemonade@amd.com
|
|
6
6
|
Requires-Python: >=3.10, <3.14
|
|
@@ -29,6 +29,7 @@ Requires-Dist: tabulate
|
|
|
29
29
|
Requires-Dist: sentencepiece
|
|
30
30
|
Requires-Dist: huggingface-hub[hf_xet]==0.33.0
|
|
31
31
|
Requires-Dist: python-dotenv
|
|
32
|
+
Requires-Dist: rumps>=0.4.0; sys_platform == "darwin"
|
|
32
33
|
Provides-Extra: oga-ryzenai
|
|
33
34
|
Requires-Dist: onnxruntime-genai-directml-ryzenai==0.7.0.2.1; extra == "oga-ryzenai"
|
|
34
35
|
Requires-Dist: protobuf>=6.30.1; extra == "oga-ryzenai"
|
|
@@ -65,6 +66,8 @@ Dynamic: summary
|
|
|
65
66
|
<img src="https://img.shields.io/badge/Windows-11-0078D6?logo=windows&logoColor=white" alt="Windows 11" /></a>
|
|
66
67
|
<a href="https://lemonade-server.ai/#linux" title="Ubuntu 24.04 & 25.04 Supported">
|
|
67
68
|
<img src="https://img.shields.io/badge/Ubuntu-24.04%20%7C%2025.04-E95420?logo=ubuntu&logoColor=white" alt="Ubuntu 24.04 | 25.04" /></a>
|
|
69
|
+
<a href="https://lemonade-server.ai/" title="macOS 14+ with Apple Silicon">
|
|
70
|
+
<img src="https://img.shields.io/badge/macOS-14%2B-000000?logo=apple&logoColor=white" alt="macOS 14+" /></a>
|
|
68
71
|
<a href="docs/README.md#installation" title="Check out our instructions">
|
|
69
72
|
<img src="https://img.shields.io/badge/Python-3.10--3.13-blue?logo=python&logoColor=white" alt="Made with Python" /></a>
|
|
70
73
|
<a href="https://github.com/lemonade-sdk/lemonade/blob/main/docs/contribute.md" title="Contribution Guide">
|
|
@@ -152,11 +155,11 @@ You can also import custom GGUF and ONNX models from Hugging Face by using our [
|
|
|
152
155
|
|
|
153
156
|
Lemonade supports the following configurations, while also making it easy to switch between them at runtime. Find more information about it [here](./docs/README.md#software-and-hardware-overview).
|
|
154
157
|
|
|
155
|
-
| Hardware | Engine: OGA | Engine: llamacpp | Engine:
|
|
156
|
-
|
|
157
|
-
| **🧠 CPU** | All platforms | All platforms |
|
|
158
|
-
| **🎮 GPU** | — | Vulkan: All platforms<br>ROCm: Selected AMD platforms
|
|
159
|
-
| **🤖 NPU** | AMD Ryzen™ AI 300 series | — |
|
|
158
|
+
| Hardware | Engine: OGA | Engine: llamacpp | Engine: FLM | Windows | Linux | macOS |
|
|
159
|
+
|----------|-------------|------------------|------------|---------|-------|-------|
|
|
160
|
+
| **🧠 CPU** | All platforms | All platforms | - | ✅ | ✅ | ✅ |
|
|
161
|
+
| **🎮 GPU** | — | Vulkan: All platforms<br>ROCm: Selected AMD platforms*<br>Metal: Apple Silicon | — | ✅ | ✅ | ✅ |
|
|
162
|
+
| **🤖 NPU** | AMD Ryzen™ AI 300 series | — | Ryzen™ AI 300 series | ✅ | — | — |
|
|
160
163
|
|
|
161
164
|
<details>
|
|
162
165
|
<summary><small><i>* See supported AMD ROCm platforms</i></small></summary>
|
|
@@ -4,17 +4,17 @@ lemonade/cache.py,sha256=5iZbk273TiTMqK_vdzPOPYTo6VsWW2gNByOISA9zi1w,3002
|
|
|
4
4
|
lemonade/cli.py,sha256=qU5bW7RQAUKNSpvrhVyzn68NMxyi-336Ke_JU4bsv1Q,5708
|
|
5
5
|
lemonade/sequence.py,sha256=KSH7BPsiyDKsOsg_ziQKEGsDwMmuO_YbgPRBxkZd0pw,13267
|
|
6
6
|
lemonade/state.py,sha256=sdSezla7Cd7KYL90xY3p9kcNV4ndSyN6UvNLOr3vBMA,5261
|
|
7
|
-
lemonade/version.py,sha256=
|
|
7
|
+
lemonade/version.py,sha256=et6OH4dSRF6oHhfToZjlMBObl75tflM6mBJXEWJArps,23
|
|
8
8
|
lemonade/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
9
9
|
lemonade/common/build.py,sha256=zTb0m1-kuUx6zw5QHp2SNnVuN6jOTMQ2FCdj9iH374U,6140
|
|
10
10
|
lemonade/common/cli_helpers.py,sha256=hjBfXrTtFl8gmCFlL-ksviXR0mOcdPtTWVNKoEp3PG4,4993
|
|
11
11
|
lemonade/common/exceptions.py,sha256=w83sVKmL1QXoJlGjj_bRyjIBMhlMqdVQy_FEOTu2YQI,2050
|
|
12
12
|
lemonade/common/filesystem.py,sha256=QV3cHhKNu-7W2rr8wZ4JQfD2rP_5T2Js7jiDQBYWHVQ,12142
|
|
13
|
-
lemonade/common/inference_engines.py,sha256=
|
|
13
|
+
lemonade/common/inference_engines.py,sha256=3bUGQe9wtfTiwt8kvI_ry077uyc9lid2G1fJX95kN1A,12969
|
|
14
14
|
lemonade/common/network.py,sha256=qXpUjDYQEYM_gH3JwTtU-pu_yCKcaa1IeohJRPy91-A,2903
|
|
15
15
|
lemonade/common/printing.py,sha256=GFFzrXIineIOMa9yu0lo5sL4j6A5BBg_T9aUCdP-juw,3229
|
|
16
16
|
lemonade/common/status.py,sha256=xSOZN508cdRtrs1HVyr9zmASYg69EsZBLSs0lroLoCM,16519
|
|
17
|
-
lemonade/common/system_info.py,sha256=
|
|
17
|
+
lemonade/common/system_info.py,sha256=Msa0pCSj3ZN3nerjY8wdqjjJLg6GPhbWf2htSNcFIHc,49607
|
|
18
18
|
lemonade/common/test_helpers.py,sha256=Gwk-pa_6xYAo2oro-2EJNfuouAfw8k_brCbcMC-E-r0,758
|
|
19
19
|
lemonade/profilers/__init__.py,sha256=JKVonvJ4XZ9_6sKXPWsiMLQCNyzQOxhQw5BEHR1qOfU,31
|
|
20
20
|
lemonade/profilers/agt_power.py,sha256=t_37VEg8LPapjSKSjJln-jFznZtTIf5UpzlAXcVGOrc,16771
|
|
@@ -31,12 +31,14 @@ lemonade/tools/mmlu.py,sha256=c2QaIMDzjqxCvgHlMXmy_dP1sAFkwkDxL7RO2nogI6s,11071
|
|
|
31
31
|
lemonade/tools/perplexity.py,sha256=eiaTZ3yhqF2pfwOffVbKKJLwjSri7Im2pC-tBJr7LLU,5638
|
|
32
32
|
lemonade/tools/prompt.py,sha256=PyLksp1k8jsZsU7XBRK61k1DUHhbdLa20h-AP8Noh3w,9011
|
|
33
33
|
lemonade/tools/tool.py,sha256=UsxVYukfm_iM3BfeGYPZxQlTK5UfDfDOl3RIyLr8A1Y,13256
|
|
34
|
+
lemonade/tools/flm/__init__.py,sha256=NQ4CEzJZGS_VvxPMlfrK4Dcx48bQSoUR4iG8e7yZjas,46
|
|
35
|
+
lemonade/tools/flm/utils.py,sha256=hHjSiRlkw239n03CyZeRQomxtmsJptM7m5M3CNnPlqo,8126
|
|
34
36
|
lemonade/tools/huggingface/bench.py,sha256=-mTfldCtquL4mspq8ykVwDc9Mut5Ecv_jHJnSb0CYGE,6734
|
|
35
37
|
lemonade/tools/huggingface/load.py,sha256=KsSGOBBD-tNEIfYC8mCWV_jpnkjHMhN3juVmC1Ln4uQ,7745
|
|
36
38
|
lemonade/tools/huggingface/utils.py,sha256=j1S-IgjDsznUIVwkHSqqChmFyqIx9f3WcEelzohWwvU,13955
|
|
37
39
|
lemonade/tools/llamacpp/bench.py,sha256=1fkE02ecg-jRk92i5dTAXz6re14WH8bd-Z9l-m3lbDA,4844
|
|
38
40
|
lemonade/tools/llamacpp/load.py,sha256=DFCvQN548Ch9H8U_rHOiYviinzw6vixb5-V7xLj7XE4,6499
|
|
39
|
-
lemonade/tools/llamacpp/utils.py,sha256=
|
|
41
|
+
lemonade/tools/llamacpp/utils.py,sha256=JpI9McEYbrZXQXb0Wo7EoQ8-0LLmmZuwbgGSuuYTiyQ,35221
|
|
40
42
|
lemonade/tools/oga/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
41
43
|
lemonade/tools/oga/bench.py,sha256=PJXv4UchcS2YPwijNzef8DY4DSAKYxIYY1ycHuH3T34,5005
|
|
42
44
|
lemonade/tools/oga/load.py,sha256=x-A-nhoni-WyDpVCLcWRAMfs5ouac9MJzxT-rsnLPw8,34226
|
|
@@ -45,33 +47,36 @@ lemonade/tools/report/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3h
|
|
|
45
47
|
lemonade/tools/report/llm_report.py,sha256=bVHhwCINA-Ok2EdSwAsLubsc83N3KWOVuwTguw7jDcE,6676
|
|
46
48
|
lemonade/tools/report/table.py,sha256=Kv_Epd8a6KIrdzSC2EgIl6uTKw7E5eMq10Tg16O0WxM,27996
|
|
47
49
|
lemonade/tools/server/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
48
|
-
lemonade/tools/server/
|
|
49
|
-
lemonade/tools/server/
|
|
50
|
+
lemonade/tools/server/flm.py,sha256=lErpSYLIB6vyVavDd5c-XOz-85m8yPPlyHhiceKWf6c,4119
|
|
51
|
+
lemonade/tools/server/llamacpp.py,sha256=dSab9hR_CcctmU8HeQYQ7U0XCbAQNd_QGZUt6q7cxHA,9952
|
|
52
|
+
lemonade/tools/server/serve.py,sha256=ONdONSN0Noh2c65Z1m9zfQuHIDgb1c42Do0ao2MG534,68771
|
|
50
53
|
lemonade/tools/server/tool_calls.py,sha256=xrAlQwKG-nv2xLlf8f9CDSaUbyMn8ZtHkds9iZLG9K8,5230
|
|
51
|
-
lemonade/tools/server/tray.py,sha256=
|
|
52
|
-
lemonade/tools/server/webapp.py,sha256=
|
|
53
|
-
lemonade/tools/server/wrapped_server.py,sha256=
|
|
54
|
+
lemonade/tools/server/tray.py,sha256=EFnSc2Ra4owiHVz6ykoMhxi2fYqZAK1g21AynAYBiyk,24426
|
|
55
|
+
lemonade/tools/server/webapp.py,sha256=GGSVIzN19C2ZaadOEPBg_D7Lt0PuF339NuWwjMPfZu8,1225
|
|
56
|
+
lemonade/tools/server/wrapped_server.py,sha256=uh7ifrRX1Hx0IuRwZRCGPyQOukitE7kKQipCCz0bSGA,19844
|
|
54
57
|
lemonade/tools/server/static/favicon.ico,sha256=hMmP9qGJNeZ0mFS86JIqPbZstXMZn0Z76_HfHQpREAU,126745
|
|
55
|
-
lemonade/tools/server/static/
|
|
56
|
-
lemonade/tools/server/static/
|
|
57
|
-
lemonade/tools/server/static/
|
|
58
|
+
lemonade/tools/server/static/logs.html,sha256=BSpdRJ8XJLStpD7XijXLbeeDaVW-FrJbI29zHNBxvYM,1321
|
|
59
|
+
lemonade/tools/server/static/styles.css,sha256=GYJgRtlZSgz3pShDeuatu-J9TpVwGgOjOcIWwNnWYck,49100
|
|
60
|
+
lemonade/tools/server/static/webapp.html,sha256=QQRMMMf8fbtJfGZYqBaRVd0-bENmdPfkVo8sdc4092Q,19151
|
|
61
|
+
lemonade/tools/server/static/js/chat.js,sha256=jxyMyu4MfvI2YmsMbJQ8ZwDNBnLzu2nbjm-qLfgWSNI,42182
|
|
58
62
|
lemonade/tools/server/static/js/model-settings.js,sha256=JXHeG7xVrRU181Hj7CZflERAi1Z6t-qwYFR4aH5nf5I,5820
|
|
59
|
-
lemonade/tools/server/static/js/models.js,sha256=
|
|
60
|
-
lemonade/tools/server/static/js/shared.js,sha256=
|
|
63
|
+
lemonade/tools/server/static/js/models.js,sha256=es3LwrU49UtoC59e-AusUpdSXQnIRNsZvw8f05keAM0,37856
|
|
64
|
+
lemonade/tools/server/static/js/shared.js,sha256=NVu7lXotLnVVh1x_eXqxX1bLzYyW_eMQttOZ9f0RqUQ,17591
|
|
65
|
+
lemonade/tools/server/utils/macos_tray.py,sha256=xwHW44ZN5hDVlJcwIpHHfqn4VRXWxXHuDACaT-ZqdO8,7095
|
|
61
66
|
lemonade/tools/server/utils/port.py,sha256=J7-g-Aqygb50jNoHLhhRfBZVM-uhGlcB5-oYBAehvgw,2263
|
|
62
|
-
lemonade/tools/server/utils/system_tray.py,sha256=b9lvNv9chJKQxvmH7qzAuUe6H9HsLu7pdHFqGlAJaL0,12654
|
|
63
67
|
lemonade/tools/server/utils/thread.py,sha256=Z-PDzGcpgfN2qxTmtlROWqrUN0B2fXdPrqo_J10fR_w,2772
|
|
68
|
+
lemonade/tools/server/utils/windows_tray.py,sha256=2z5aTmUPlkT-QfkcfwHsyA6dv6nSNBT0gXUErarhac8,13170
|
|
64
69
|
lemonade_install/__init__.py,sha256=26zohKg2jgr_5y7tObduWMYQg8zCTWMZHL8lfi2zZVQ,40
|
|
65
|
-
lemonade_install/install.py,sha256=
|
|
66
|
-
lemonade_sdk-8.1.
|
|
67
|
-
lemonade_sdk-8.1.
|
|
68
|
-
lemonade_server/cli.py,sha256=
|
|
69
|
-
lemonade_server/model_manager.py,sha256=
|
|
70
|
-
lemonade_server/pydantic_models.py,sha256=
|
|
71
|
-
lemonade_server/server_models.json,sha256=
|
|
72
|
-
lemonade_server/settings.py,sha256=
|
|
73
|
-
lemonade_sdk-8.1.
|
|
74
|
-
lemonade_sdk-8.1.
|
|
75
|
-
lemonade_sdk-8.1.
|
|
76
|
-
lemonade_sdk-8.1.
|
|
77
|
-
lemonade_sdk-8.1.
|
|
70
|
+
lemonade_install/install.py,sha256=p3pYqhUnLQ9JJMcbjlSYDYqN-amnU_535O9Oj1yPbyM,27608
|
|
71
|
+
lemonade_sdk-8.1.11.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
72
|
+
lemonade_sdk-8.1.11.dist-info/licenses/NOTICE.md,sha256=RSca9LE5e6pvdWA_LXAUCcACIHPmINKqkRX-AVRqBGo,3499
|
|
73
|
+
lemonade_server/cli.py,sha256=mzlIqLeGkU96KHuib8lmZn0snU7XjQ9lspOy83dVplo,23401
|
|
74
|
+
lemonade_server/model_manager.py,sha256=6W6_nQea6hLD82Il2o_EgQ7oNaLfICXPKNjZiY9Y1Xk,26331
|
|
75
|
+
lemonade_server/pydantic_models.py,sha256=5U3PZ__UqcWQh-dNXVBc-vyJc6-In2vngZXP9VmiScM,3954
|
|
76
|
+
lemonade_server/server_models.json,sha256=_GxymNW7gBkJcTjzncYUvPUJ8kc-I3qqWwHqPztzYcA,14644
|
|
77
|
+
lemonade_server/settings.py,sha256=JOlZmirUXO9rA6BCODVFwyXrrHtYoH_LiKYm49lGm_c,1260
|
|
78
|
+
lemonade_sdk-8.1.11.dist-info/METADATA,sha256=uLq8pbAD_uriJbpPT25i5gk-Ukq_RbWzzEckgtceFDo,15334
|
|
79
|
+
lemonade_sdk-8.1.11.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
80
|
+
lemonade_sdk-8.1.11.dist-info/entry_points.txt,sha256=7sRvpNhi1E7amnM7RZo57e8yFF9iA5uuRaIeJ1Xre6w,193
|
|
81
|
+
lemonade_sdk-8.1.11.dist-info/top_level.txt,sha256=10ap5GNiPhalO4V50LRoxA1FqRT9g3Xkia6BITu880k,42
|
|
82
|
+
lemonade_sdk-8.1.11.dist-info/RECORD,,
|
lemonade_server/cli.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import argparse
|
|
2
2
|
import sys
|
|
3
3
|
import os
|
|
4
|
+
import platform
|
|
4
5
|
from typing import Tuple, Optional
|
|
5
6
|
import psutil
|
|
6
7
|
from typing import List
|
|
@@ -104,12 +105,34 @@ def serve(
|
|
|
104
105
|
max_wait_time = 30
|
|
105
106
|
wait_interval = 0.5
|
|
106
107
|
waited = 0
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
108
|
+
|
|
109
|
+
if platform.system() == "Darwin":
|
|
110
|
+
# On macOS, use direct HTTP health check instead of process scanning for better
|
|
111
|
+
# performance
|
|
112
|
+
import requests
|
|
113
|
+
|
|
114
|
+
while waited < max_wait_time:
|
|
115
|
+
time.sleep(wait_interval)
|
|
116
|
+
try:
|
|
117
|
+
response = requests.get(
|
|
118
|
+
f"http://{host}:{port}/api/v1/health", timeout=1
|
|
119
|
+
)
|
|
120
|
+
if response.status_code == 200:
|
|
121
|
+
break
|
|
122
|
+
except (
|
|
123
|
+
requests.exceptions.ConnectionError,
|
|
124
|
+
requests.exceptions.Timeout,
|
|
125
|
+
):
|
|
126
|
+
pass # Server not ready yet
|
|
127
|
+
waited += wait_interval
|
|
128
|
+
else:
|
|
129
|
+
# On other platforms, use the existing approach
|
|
130
|
+
while waited < max_wait_time:
|
|
131
|
+
time.sleep(wait_interval)
|
|
132
|
+
_, running_port = get_server_info()
|
|
133
|
+
if running_port is not None:
|
|
134
|
+
break
|
|
135
|
+
waited += wait_interval
|
|
113
136
|
|
|
114
137
|
return port, server_thread
|
|
115
138
|
|
|
@@ -176,6 +199,7 @@ def pull(
|
|
|
176
199
|
checkpoint: Optional[str] = None,
|
|
177
200
|
recipe: Optional[str] = None,
|
|
178
201
|
reasoning: bool = False,
|
|
202
|
+
vision: bool = False,
|
|
179
203
|
mmproj: str = "",
|
|
180
204
|
):
|
|
181
205
|
"""
|
|
@@ -202,6 +226,7 @@ def pull(
|
|
|
202
226
|
("checkpoint", checkpoint),
|
|
203
227
|
("recipe", recipe),
|
|
204
228
|
("reasoning", reasoning),
|
|
229
|
+
("vision", vision),
|
|
205
230
|
("mmproj", mmproj),
|
|
206
231
|
]:
|
|
207
232
|
if value:
|
|
@@ -224,6 +249,7 @@ def pull(
|
|
|
224
249
|
checkpoint=checkpoint,
|
|
225
250
|
recipe=recipe,
|
|
226
251
|
reasoning=reasoning,
|
|
252
|
+
vision=vision,
|
|
227
253
|
mmproj=mmproj,
|
|
228
254
|
# The pull command will download an upgraded model if available, even
|
|
229
255
|
# if we already have a local copy of the model
|
|
@@ -282,6 +308,10 @@ def run(
|
|
|
282
308
|
import time
|
|
283
309
|
import os
|
|
284
310
|
|
|
311
|
+
# Disable tray on macOS for run command due to threading issues
|
|
312
|
+
if platform.system() == "Darwin":
|
|
313
|
+
tray = False
|
|
314
|
+
|
|
285
315
|
# Start the server if not running
|
|
286
316
|
_, running_port = get_server_info()
|
|
287
317
|
server_previously_running = running_port is not None
|
|
@@ -367,6 +397,23 @@ def is_lemonade_server(pid):
|
|
|
367
397
|
"""
|
|
368
398
|
Check whether or not a given PID corresponds to a Lemonade server
|
|
369
399
|
"""
|
|
400
|
+
# macOS only: Self-exclusion to prevent blocking server startup
|
|
401
|
+
if platform.system() == "Darwin":
|
|
402
|
+
current_pid = os.getpid()
|
|
403
|
+
if pid == current_pid:
|
|
404
|
+
return False
|
|
405
|
+
|
|
406
|
+
# Exclude children of current process to avoid detecting status commands
|
|
407
|
+
try:
|
|
408
|
+
current_process = psutil.Process(current_pid)
|
|
409
|
+
child_pids = [
|
|
410
|
+
child.pid for child in current_process.children(recursive=True)
|
|
411
|
+
]
|
|
412
|
+
if pid in child_pids:
|
|
413
|
+
return False
|
|
414
|
+
except (psutil.NoSuchProcess, psutil.AccessDenied):
|
|
415
|
+
pass
|
|
416
|
+
|
|
370
417
|
try:
|
|
371
418
|
process = psutil.Process(pid)
|
|
372
419
|
|
|
@@ -375,11 +422,29 @@ def is_lemonade_server(pid):
|
|
|
375
422
|
if process_name in [ # Windows
|
|
376
423
|
"lemonade-server-dev.exe",
|
|
377
424
|
"lemonade-server.exe",
|
|
425
|
+
"lsdev.exe",
|
|
378
426
|
] or process_name in [ # Linux
|
|
379
427
|
"lemonade-server-dev",
|
|
380
428
|
"lemonade-server",
|
|
429
|
+
"lsdev",
|
|
381
430
|
]:
|
|
382
431
|
return True
|
|
432
|
+
# macOS only: Python scripts appear as "python3.x", check command line
|
|
433
|
+
elif process_name.startswith("python") and platform.system() == "Darwin":
|
|
434
|
+
try:
|
|
435
|
+
cmdline = process.cmdline()
|
|
436
|
+
if len(cmdline) >= 2:
|
|
437
|
+
script_path = cmdline[1]
|
|
438
|
+
# Check for various lemonade server command patterns (macOS only)
|
|
439
|
+
lemonade_patterns = [
|
|
440
|
+
"lemonade-server-dev",
|
|
441
|
+
"lemonade-server",
|
|
442
|
+
"lsdev", # Short alias for lemonade-server-dev
|
|
443
|
+
]
|
|
444
|
+
if any(pattern in script_path for pattern in lemonade_patterns):
|
|
445
|
+
return True
|
|
446
|
+
except (psutil.AccessDenied, psutil.NoSuchProcess):
|
|
447
|
+
pass
|
|
383
448
|
elif "llama-server" in process_name:
|
|
384
449
|
return False
|
|
385
450
|
if not process.parent():
|
|
@@ -397,18 +462,43 @@ def get_server_info() -> Tuple[int | None, int | None]:
|
|
|
397
462
|
2. The port that Lemonade Server is running on
|
|
398
463
|
"""
|
|
399
464
|
|
|
400
|
-
#
|
|
465
|
+
# Try the global approach first (works on Windows/Linux without permissions)
|
|
401
466
|
try:
|
|
402
467
|
connections = psutil.net_connections(kind="tcp4")
|
|
403
|
-
|
|
404
468
|
for conn in connections:
|
|
405
469
|
if conn.status == "LISTEN" and conn.laddr and conn.pid is not None:
|
|
406
470
|
if is_lemonade_server(conn.pid):
|
|
407
471
|
return conn.pid, conn.laddr.port
|
|
408
|
-
|
|
409
|
-
|
|
472
|
+
except (psutil.AccessDenied, PermissionError):
|
|
473
|
+
# Global approach needs elevated permissions on macOS, fall back to per-process approach
|
|
474
|
+
pass
|
|
475
|
+
except Exception: # pylint: disable=broad-exception-caught
|
|
410
476
|
pass
|
|
411
477
|
|
|
478
|
+
# Per-process approach (macOS only - needs this due to permission requirements)
|
|
479
|
+
if platform.system() == "Darwin":
|
|
480
|
+
try:
|
|
481
|
+
for proc in psutil.process_iter(["pid", "name"]):
|
|
482
|
+
try:
|
|
483
|
+
pid = proc.info["pid"]
|
|
484
|
+
if is_lemonade_server(pid):
|
|
485
|
+
# Found a lemonade server, check its listening ports
|
|
486
|
+
connections = proc.net_connections(kind="inet")
|
|
487
|
+
for conn in connections:
|
|
488
|
+
if conn.status == "LISTEN" and conn.laddr:
|
|
489
|
+
return pid, conn.laddr.port
|
|
490
|
+
# If no listening connections found, this process is not actually serving
|
|
491
|
+
# Continue looking for other processes
|
|
492
|
+
except (
|
|
493
|
+
psutil.NoSuchProcess,
|
|
494
|
+
psutil.AccessDenied,
|
|
495
|
+
psutil.ZombieProcess,
|
|
496
|
+
):
|
|
497
|
+
# Some processes may be inaccessible, continue to next
|
|
498
|
+
continue
|
|
499
|
+
except Exception: # pylint: disable=broad-exception-caught
|
|
500
|
+
pass
|
|
501
|
+
|
|
412
502
|
return None, None
|
|
413
503
|
|
|
414
504
|
|
|
@@ -423,12 +513,13 @@ def list_models():
|
|
|
423
513
|
|
|
424
514
|
# Get all supported models and downloaded models
|
|
425
515
|
supported_models = model_manager.supported_models
|
|
516
|
+
filtered_models = model_manager.filter_models_by_backend(supported_models)
|
|
426
517
|
downloaded_models = model_manager.downloaded_models
|
|
427
518
|
|
|
428
519
|
# Filter to only show recommended models
|
|
429
520
|
recommended_models = {
|
|
430
521
|
model_name: model_info
|
|
431
|
-
for model_name, model_info in
|
|
522
|
+
for model_name, model_info in filtered_models.items()
|
|
432
523
|
if model_info.get("suggested", False)
|
|
433
524
|
}
|
|
434
525
|
|
|
@@ -505,7 +596,7 @@ def _add_server_arguments(parser):
|
|
|
505
596
|
"--llamacpp",
|
|
506
597
|
type=str,
|
|
507
598
|
help="LlamaCpp backend to use",
|
|
508
|
-
choices=["vulkan", "rocm"],
|
|
599
|
+
choices=["vulkan", "rocm", "metal"],
|
|
509
600
|
default=DEFAULT_LLAMACPP_BACKEND,
|
|
510
601
|
)
|
|
511
602
|
parser.add_argument(
|
|
@@ -518,7 +609,7 @@ def _add_server_arguments(parser):
|
|
|
518
609
|
default=DEFAULT_CTX_SIZE,
|
|
519
610
|
)
|
|
520
611
|
|
|
521
|
-
if os.name == "nt":
|
|
612
|
+
if os.name == "nt" or platform.system() == "Darwin":
|
|
522
613
|
parser.add_argument(
|
|
523
614
|
"--no-tray",
|
|
524
615
|
action="store_true",
|
|
@@ -618,7 +709,7 @@ def main():
|
|
|
618
709
|
|
|
619
710
|
args = parser.parse_args()
|
|
620
711
|
|
|
621
|
-
if os.name != "nt":
|
|
712
|
+
if os.name != "nt" and platform.system() != "Darwin":
|
|
622
713
|
args.no_tray = True
|
|
623
714
|
|
|
624
715
|
if args.version:
|