lemonade-sdk 8.1.9__py3-none-any.whl → 8.1.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lemonade-sdk might be problematic. Click here for more details.

Files changed (33) hide show
  1. lemonade/common/inference_engines.py +13 -4
  2. lemonade/common/system_info.py +570 -1
  3. lemonade/tools/flm/__init__.py +1 -0
  4. lemonade/tools/flm/utils.py +255 -0
  5. lemonade/tools/llamacpp/utils.py +62 -13
  6. lemonade/tools/server/flm.py +137 -0
  7. lemonade/tools/server/llamacpp.py +23 -5
  8. lemonade/tools/server/serve.py +292 -135
  9. lemonade/tools/server/static/js/chat.js +165 -82
  10. lemonade/tools/server/static/js/models.js +87 -54
  11. lemonade/tools/server/static/js/shared.js +5 -3
  12. lemonade/tools/server/static/logs.html +47 -0
  13. lemonade/tools/server/static/styles.css +159 -8
  14. lemonade/tools/server/static/webapp.html +28 -10
  15. lemonade/tools/server/tray.py +158 -38
  16. lemonade/tools/server/utils/macos_tray.py +226 -0
  17. lemonade/tools/server/utils/{system_tray.py → windows_tray.py} +13 -0
  18. lemonade/tools/server/webapp.py +4 -1
  19. lemonade/tools/server/wrapped_server.py +91 -25
  20. lemonade/version.py +1 -1
  21. lemonade_install/install.py +25 -2
  22. {lemonade_sdk-8.1.9.dist-info → lemonade_sdk-8.1.11.dist-info}/METADATA +9 -6
  23. {lemonade_sdk-8.1.9.dist-info → lemonade_sdk-8.1.11.dist-info}/RECORD +33 -28
  24. lemonade_server/cli.py +105 -14
  25. lemonade_server/model_manager.py +186 -45
  26. lemonade_server/pydantic_models.py +25 -1
  27. lemonade_server/server_models.json +162 -62
  28. lemonade_server/settings.py +39 -39
  29. {lemonade_sdk-8.1.9.dist-info → lemonade_sdk-8.1.11.dist-info}/WHEEL +0 -0
  30. {lemonade_sdk-8.1.9.dist-info → lemonade_sdk-8.1.11.dist-info}/entry_points.txt +0 -0
  31. {lemonade_sdk-8.1.9.dist-info → lemonade_sdk-8.1.11.dist-info}/licenses/LICENSE +0 -0
  32. {lemonade_sdk-8.1.9.dist-info → lemonade_sdk-8.1.11.dist-info}/licenses/NOTICE.md +0 -0
  33. {lemonade_sdk-8.1.9.dist-info → lemonade_sdk-8.1.11.dist-info}/top_level.txt +0 -0
@@ -58,8 +58,22 @@ class WrappedServerTelemetry(ABC):
58
58
  telemetry = [
59
59
  ["Input tokens", self.input_tokens],
60
60
  ["Output tokens", self.output_tokens],
61
- ["TTFT (s)", f"{self.time_to_first_token:.2f}"],
62
- ["TPS", f"{self.tokens_per_second:.2f}"],
61
+ [
62
+ "TTFT (s)",
63
+ (
64
+ f"{self.time_to_first_token:.2f}"
65
+ if self.time_to_first_token is not None
66
+ else "N/A"
67
+ ),
68
+ ],
69
+ [
70
+ "TPS",
71
+ (
72
+ f"{self.tokens_per_second:.2f}"
73
+ if self.tokens_per_second is not None
74
+ else "N/A"
75
+ ),
76
+ ],
63
77
  ]
64
78
 
65
79
  table = tabulate(
@@ -83,7 +97,7 @@ class WrappedServer(ABC):
83
97
  self.telemetry: WrappedServerTelemetry = telemetry
84
98
  self.log_thread_exception = None
85
99
 
86
- def choose_port(self):
100
+ def _choose_port(self):
87
101
  """
88
102
  Users probably don't care what port we start the wrapped server on, so let's
89
103
  search for an empty port
@@ -318,18 +332,44 @@ class WrappedServer(ABC):
318
332
  if chat_completion_request.stream:
319
333
 
320
334
  def event_stream():
321
- try:
322
- # Enable streaming
323
- # pylint: disable=missing-kwoa
324
- for chunk in client.chat.completions.create(**openai_client_params):
325
- yield f"data: {chunk.model_dump_json()}\n\n"
326
- yield "data: [DONE]\n\n"
327
-
328
- # Show telemetry after completion
329
- self.telemetry.show_telemetry()
335
+ # Ensure streaming is enabled in params
336
+ stream_params = dict(openai_client_params)
337
+ stream_params["stream"] = True
330
338
 
331
- except Exception as e: # pylint: disable=broad-exception-caught
332
- yield f'data: {{"error": "{str(e)}"}}\n\n'
339
+ # Use streaming context so we can explicitly close on cancellation
340
+ with client.chat.completions.with_streaming_response.create(
341
+ # pylint: disable=missing-kwoa
342
+ **stream_params,
343
+ ) as response:
344
+ try:
345
+ for line in response.iter_lines():
346
+ # Preserve SSE event boundaries: blank line separates events
347
+ if line == b"" or line == "":
348
+ yield "\n"
349
+ continue
350
+ if isinstance(line, bytes):
351
+ try:
352
+ line = line.decode("utf-8", errors="ignore")
353
+ except (UnicodeDecodeError, LookupError):
354
+ # Skip lines that fail decoding due to encoding issues
355
+ continue
356
+ # Forward SSE lines as-is
357
+ if not line.endswith("\n"):
358
+ line += "\n"
359
+ yield line
360
+
361
+ # Show telemetry after completion
362
+ self.telemetry.show_telemetry()
363
+
364
+ except GeneratorExit:
365
+ # Client disconnected/cancelled; close upstream stream and stop
366
+ try:
367
+ response.close()
368
+ except Exception: # pylint: disable=broad-exception-caught
369
+ pass
370
+ raise
371
+ except Exception as e: # pylint: disable=broad-exception-caught
372
+ yield f'data: {{"error": "{str(e)}"}}\n\n'
333
373
 
334
374
  return StreamingResponse(
335
375
  event_stream(),
@@ -387,18 +427,44 @@ class WrappedServer(ABC):
387
427
  if completion_request.stream:
388
428
 
389
429
  def event_stream():
390
- try:
391
- # Enable streaming
392
- # pylint: disable=missing-kwoa
393
- for chunk in client.completions.create(**openai_client_params):
394
- yield f"data: {chunk.model_dump_json()}\n\n"
395
- yield "data: [DONE]\n\n"
396
-
397
- # Show telemetry after completion
398
- self.telemetry.show_telemetry()
430
+ # Ensure streaming is enabled in params
431
+ stream_params = dict(openai_client_params)
432
+ stream_params["stream"] = True
399
433
 
400
- except Exception as e: # pylint: disable=broad-exception-caught
401
- yield f'data: {{"error": "{str(e)}"}}\n\n'
434
+ # Use streaming context so we can explicitly close on cancellation
435
+ with client.completions.with_streaming_response.create(
436
+ # pylint: disable=missing-kwoa
437
+ **stream_params,
438
+ ) as response:
439
+ try:
440
+ for line in response.iter_lines():
441
+ # Preserve SSE event boundaries: blank line separates events
442
+ if line == b"" or line == "":
443
+ yield "\n"
444
+ continue
445
+ if isinstance(line, bytes):
446
+ try:
447
+ line = line.decode("utf-8", errors="ignore")
448
+ except (UnicodeDecodeError, LookupError):
449
+ # Skip lines that fail decoding due to encoding issues
450
+ continue
451
+ # Forward SSE lines as-is
452
+ if not line.endswith("\n"):
453
+ line += "\n"
454
+ yield line
455
+
456
+ # Show telemetry after completion
457
+ self.telemetry.show_telemetry()
458
+
459
+ except GeneratorExit:
460
+ # Client disconnected/cancelled; close upstream stream and stop
461
+ try:
462
+ response.close()
463
+ except Exception: # pylint: disable=broad-exception-caught
464
+ pass
465
+ raise
466
+ except Exception as e: # pylint: disable=broad-exception-caught
467
+ yield f'data: {{"error": "{str(e)}"}}\n\n'
402
468
 
403
469
  return StreamingResponse(
404
470
  event_stream(),
lemonade/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "8.1.9"
1
+ __version__ = "8.1.11"
@@ -447,6 +447,12 @@ class Install:
447
447
  choices=["rocm", "vulkan"],
448
448
  )
449
449
 
450
+ parser.add_argument(
451
+ "--flm",
452
+ action="store_true",
453
+ help="Install FLM (FastFlowLM) for running local language models",
454
+ )
455
+
450
456
  parser.add_argument(
451
457
  "--override",
452
458
  action="store_true",
@@ -727,19 +733,33 @@ class Install:
727
733
 
728
734
  install_llamacpp(backend)
729
735
 
736
+ @staticmethod
737
+ def _install_flm():
738
+ """
739
+ Install FLM (FastFlowLM) for running local language models.
740
+ """
741
+
742
+ # Check if the processor is supported before proceeding
743
+ check_ryzen_ai_processor()
744
+
745
+ from lemonade.tools.flm.utils import install_flm
746
+
747
+ install_flm()
748
+
730
749
  def run(
731
750
  self,
732
751
  ryzenai: Optional[str] = None,
733
752
  build_model: Optional[str] = None,
734
753
  llamacpp: Optional[str] = None,
754
+ flm: Optional[bool] = None,
735
755
  yes: bool = False,
736
756
  token: Optional[str] = None,
737
757
  override: bool = False,
738
758
  ):
739
- if ryzenai is None and llamacpp is None:
759
+ if ryzenai is None and llamacpp is None and flm is None:
740
760
  raise ValueError(
741
761
  "You must select something to install, "
742
- "for example `--ryzenai` or `--llamacpp`"
762
+ "for example `--llamacpp`, `--flm`, or `--ryzenai`"
743
763
  )
744
764
 
745
765
  if ryzenai is not None:
@@ -748,6 +768,9 @@ class Install:
748
768
  if llamacpp is not None:
749
769
  self._install_llamacpp(llamacpp)
750
770
 
771
+ if flm:
772
+ self._install_flm()
773
+
751
774
 
752
775
  def main():
753
776
  installer = Install()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lemonade-sdk
3
- Version: 8.1.9
3
+ Version: 8.1.11
4
4
  Summary: Lemonade SDK: Your LLM Aide for Validation and Deployment
5
5
  Author-email: lemonade@amd.com
6
6
  Requires-Python: >=3.10, <3.14
@@ -29,6 +29,7 @@ Requires-Dist: tabulate
29
29
  Requires-Dist: sentencepiece
30
30
  Requires-Dist: huggingface-hub[hf_xet]==0.33.0
31
31
  Requires-Dist: python-dotenv
32
+ Requires-Dist: rumps>=0.4.0; sys_platform == "darwin"
32
33
  Provides-Extra: oga-ryzenai
33
34
  Requires-Dist: onnxruntime-genai-directml-ryzenai==0.7.0.2.1; extra == "oga-ryzenai"
34
35
  Requires-Dist: protobuf>=6.30.1; extra == "oga-ryzenai"
@@ -65,6 +66,8 @@ Dynamic: summary
65
66
  <img src="https://img.shields.io/badge/Windows-11-0078D6?logo=windows&logoColor=white" alt="Windows 11" /></a>
66
67
  <a href="https://lemonade-server.ai/#linux" title="Ubuntu 24.04 & 25.04 Supported">
67
68
  <img src="https://img.shields.io/badge/Ubuntu-24.04%20%7C%2025.04-E95420?logo=ubuntu&logoColor=white" alt="Ubuntu 24.04 | 25.04" /></a>
69
+ <a href="https://lemonade-server.ai/" title="macOS 14+ with Apple Silicon">
70
+ <img src="https://img.shields.io/badge/macOS-14%2B-000000?logo=apple&logoColor=white" alt="macOS 14+" /></a>
68
71
  <a href="docs/README.md#installation" title="Check out our instructions">
69
72
  <img src="https://img.shields.io/badge/Python-3.10--3.13-blue?logo=python&logoColor=white" alt="Made with Python" /></a>
70
73
  <a href="https://github.com/lemonade-sdk/lemonade/blob/main/docs/contribute.md" title="Contribution Guide">
@@ -152,11 +155,11 @@ You can also import custom GGUF and ONNX models from Hugging Face by using our [
152
155
 
153
156
  Lemonade supports the following configurations, while also making it easy to switch between them at runtime. Find more information about it [here](./docs/README.md#software-and-hardware-overview).
154
157
 
155
- | Hardware | Engine: OGA | Engine: llamacpp | Engine: HF | Windows | Linux |
156
- |----------|-------------|------------------|------------|---------|-------|
157
- | **🧠 CPU** | All platforms | All platforms | All platforms | ✅ | ✅ |
158
- | **🎮 GPU** | — | Vulkan: All platforms<br>ROCm: Selected AMD platforms* | — | ✅ | ✅ |
159
- | **🤖 NPU** | AMD Ryzen™ AI 300 series | — | | ✅ | — |
158
+ | Hardware | Engine: OGA | Engine: llamacpp | Engine: FLM | Windows | Linux | macOS |
159
+ |----------|-------------|------------------|------------|---------|-------|-------|
160
+ | **🧠 CPU** | All platforms | All platforms | - | | ✅ | ✅ |
161
+ | **🎮 GPU** | — | Vulkan: All platforms<br>ROCm: Selected AMD platforms*<br>Metal: Apple Silicon | — | ✅ | ✅ | ✅ |
162
+ | **🤖 NPU** | AMD Ryzen™ AI 300 series | — | Ryzen™ AI 300 series | ✅ | — | — |
160
163
 
161
164
  <details>
162
165
  <summary><small><i>* See supported AMD ROCm platforms</i></small></summary>
@@ -4,17 +4,17 @@ lemonade/cache.py,sha256=5iZbk273TiTMqK_vdzPOPYTo6VsWW2gNByOISA9zi1w,3002
4
4
  lemonade/cli.py,sha256=qU5bW7RQAUKNSpvrhVyzn68NMxyi-336Ke_JU4bsv1Q,5708
5
5
  lemonade/sequence.py,sha256=KSH7BPsiyDKsOsg_ziQKEGsDwMmuO_YbgPRBxkZd0pw,13267
6
6
  lemonade/state.py,sha256=sdSezla7Cd7KYL90xY3p9kcNV4ndSyN6UvNLOr3vBMA,5261
7
- lemonade/version.py,sha256=nNxNovCuweKgLzK71oDtPCagHKqrBZW7fNRdrO5VFWA,22
7
+ lemonade/version.py,sha256=et6OH4dSRF6oHhfToZjlMBObl75tflM6mBJXEWJArps,23
8
8
  lemonade/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
9
  lemonade/common/build.py,sha256=zTb0m1-kuUx6zw5QHp2SNnVuN6jOTMQ2FCdj9iH374U,6140
10
10
  lemonade/common/cli_helpers.py,sha256=hjBfXrTtFl8gmCFlL-ksviXR0mOcdPtTWVNKoEp3PG4,4993
11
11
  lemonade/common/exceptions.py,sha256=w83sVKmL1QXoJlGjj_bRyjIBMhlMqdVQy_FEOTu2YQI,2050
12
12
  lemonade/common/filesystem.py,sha256=QV3cHhKNu-7W2rr8wZ4JQfD2rP_5T2Js7jiDQBYWHVQ,12142
13
- lemonade/common/inference_engines.py,sha256=pJxn0zOf3gEmjGAIWXNdCibfzarzc7LRbZjoQyygkcU,12591
13
+ lemonade/common/inference_engines.py,sha256=3bUGQe9wtfTiwt8kvI_ry077uyc9lid2G1fJX95kN1A,12969
14
14
  lemonade/common/network.py,sha256=qXpUjDYQEYM_gH3JwTtU-pu_yCKcaa1IeohJRPy91-A,2903
15
15
  lemonade/common/printing.py,sha256=GFFzrXIineIOMa9yu0lo5sL4j6A5BBg_T9aUCdP-juw,3229
16
16
  lemonade/common/status.py,sha256=xSOZN508cdRtrs1HVyr9zmASYg69EsZBLSs0lroLoCM,16519
17
- lemonade/common/system_info.py,sha256=pn-k3zMQCbt5cu3aHXa4cENgrubOK97gs9PYdGPsFXA,28405
17
+ lemonade/common/system_info.py,sha256=Msa0pCSj3ZN3nerjY8wdqjjJLg6GPhbWf2htSNcFIHc,49607
18
18
  lemonade/common/test_helpers.py,sha256=Gwk-pa_6xYAo2oro-2EJNfuouAfw8k_brCbcMC-E-r0,758
19
19
  lemonade/profilers/__init__.py,sha256=JKVonvJ4XZ9_6sKXPWsiMLQCNyzQOxhQw5BEHR1qOfU,31
20
20
  lemonade/profilers/agt_power.py,sha256=t_37VEg8LPapjSKSjJln-jFznZtTIf5UpzlAXcVGOrc,16771
@@ -31,12 +31,14 @@ lemonade/tools/mmlu.py,sha256=c2QaIMDzjqxCvgHlMXmy_dP1sAFkwkDxL7RO2nogI6s,11071
31
31
  lemonade/tools/perplexity.py,sha256=eiaTZ3yhqF2pfwOffVbKKJLwjSri7Im2pC-tBJr7LLU,5638
32
32
  lemonade/tools/prompt.py,sha256=PyLksp1k8jsZsU7XBRK61k1DUHhbdLa20h-AP8Noh3w,9011
33
33
  lemonade/tools/tool.py,sha256=UsxVYukfm_iM3BfeGYPZxQlTK5UfDfDOl3RIyLr8A1Y,13256
34
+ lemonade/tools/flm/__init__.py,sha256=NQ4CEzJZGS_VvxPMlfrK4Dcx48bQSoUR4iG8e7yZjas,46
35
+ lemonade/tools/flm/utils.py,sha256=hHjSiRlkw239n03CyZeRQomxtmsJptM7m5M3CNnPlqo,8126
34
36
  lemonade/tools/huggingface/bench.py,sha256=-mTfldCtquL4mspq8ykVwDc9Mut5Ecv_jHJnSb0CYGE,6734
35
37
  lemonade/tools/huggingface/load.py,sha256=KsSGOBBD-tNEIfYC8mCWV_jpnkjHMhN3juVmC1Ln4uQ,7745
36
38
  lemonade/tools/huggingface/utils.py,sha256=j1S-IgjDsznUIVwkHSqqChmFyqIx9f3WcEelzohWwvU,13955
37
39
  lemonade/tools/llamacpp/bench.py,sha256=1fkE02ecg-jRk92i5dTAXz6re14WH8bd-Z9l-m3lbDA,4844
38
40
  lemonade/tools/llamacpp/load.py,sha256=DFCvQN548Ch9H8U_rHOiYviinzw6vixb5-V7xLj7XE4,6499
39
- lemonade/tools/llamacpp/utils.py,sha256=nl11DRJCzcDV7OcAJtA0YRVxStdBvcnLZbhDhUrN1So,33041
41
+ lemonade/tools/llamacpp/utils.py,sha256=JpI9McEYbrZXQXb0Wo7EoQ8-0LLmmZuwbgGSuuYTiyQ,35221
40
42
  lemonade/tools/oga/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
41
43
  lemonade/tools/oga/bench.py,sha256=PJXv4UchcS2YPwijNzef8DY4DSAKYxIYY1ycHuH3T34,5005
42
44
  lemonade/tools/oga/load.py,sha256=x-A-nhoni-WyDpVCLcWRAMfs5ouac9MJzxT-rsnLPw8,34226
@@ -45,33 +47,36 @@ lemonade/tools/report/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3h
45
47
  lemonade/tools/report/llm_report.py,sha256=bVHhwCINA-Ok2EdSwAsLubsc83N3KWOVuwTguw7jDcE,6676
46
48
  lemonade/tools/report/table.py,sha256=Kv_Epd8a6KIrdzSC2EgIl6uTKw7E5eMq10Tg16O0WxM,27996
47
49
  lemonade/tools/server/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
48
- lemonade/tools/server/llamacpp.py,sha256=8HdTkrU2ht8L1ldXqkfYfYhXiA8TvySuaslinAMqr-c,9002
49
- lemonade/tools/server/serve.py,sha256=gwKPs-r-bfQsyCkVxzVyTp0SyNYVqqfMeeUFQ9TivwM,61638
50
+ lemonade/tools/server/flm.py,sha256=lErpSYLIB6vyVavDd5c-XOz-85m8yPPlyHhiceKWf6c,4119
51
+ lemonade/tools/server/llamacpp.py,sha256=dSab9hR_CcctmU8HeQYQ7U0XCbAQNd_QGZUt6q7cxHA,9952
52
+ lemonade/tools/server/serve.py,sha256=ONdONSN0Noh2c65Z1m9zfQuHIDgb1c42Do0ao2MG534,68771
50
53
  lemonade/tools/server/tool_calls.py,sha256=xrAlQwKG-nv2xLlf8f9CDSaUbyMn8ZtHkds9iZLG9K8,5230
51
- lemonade/tools/server/tray.py,sha256=a9z6hdqlfj91H00j6hAExRPQkzWHhE3dnqSumzEgq0U,19599
52
- lemonade/tools/server/webapp.py,sha256=8Das5yXOaSBLZmSZ_eddJajQFxBhvl5D6GI_hHlGbE0,1040
53
- lemonade/tools/server/wrapped_server.py,sha256=DlzsGUwLQzjOFRfTTxfnhvmM_9lvAki96jWIAz7Czds,16713
54
+ lemonade/tools/server/tray.py,sha256=EFnSc2Ra4owiHVz6ykoMhxi2fYqZAK1g21AynAYBiyk,24426
55
+ lemonade/tools/server/webapp.py,sha256=GGSVIzN19C2ZaadOEPBg_D7Lt0PuF339NuWwjMPfZu8,1225
56
+ lemonade/tools/server/wrapped_server.py,sha256=uh7ifrRX1Hx0IuRwZRCGPyQOukitE7kKQipCCz0bSGA,19844
54
57
  lemonade/tools/server/static/favicon.ico,sha256=hMmP9qGJNeZ0mFS86JIqPbZstXMZn0Z76_HfHQpREAU,126745
55
- lemonade/tools/server/static/styles.css,sha256=SYEK4rC-MdpkTj31gxNl9Kb3hCNd1Fpq-EGnRMTjVe8,45362
56
- lemonade/tools/server/static/webapp.html,sha256=j7A8SOwbY_GfOSkOMV3JvXhOKY1iG70JYYuA3WdoWSQ,17856
57
- lemonade/tools/server/static/js/chat.js,sha256=XpQSIn1TUra26tu2CtDyOayhXAbUEqzBK0oGtkCAu-s,39162
58
+ lemonade/tools/server/static/logs.html,sha256=BSpdRJ8XJLStpD7XijXLbeeDaVW-FrJbI29zHNBxvYM,1321
59
+ lemonade/tools/server/static/styles.css,sha256=GYJgRtlZSgz3pShDeuatu-J9TpVwGgOjOcIWwNnWYck,49100
60
+ lemonade/tools/server/static/webapp.html,sha256=QQRMMMf8fbtJfGZYqBaRVd0-bENmdPfkVo8sdc4092Q,19151
61
+ lemonade/tools/server/static/js/chat.js,sha256=jxyMyu4MfvI2YmsMbJQ8ZwDNBnLzu2nbjm-qLfgWSNI,42182
58
62
  lemonade/tools/server/static/js/model-settings.js,sha256=JXHeG7xVrRU181Hj7CZflERAi1Z6t-qwYFR4aH5nf5I,5820
59
- lemonade/tools/server/static/js/models.js,sha256=7bCJbvS8FWpj6f1ZOwM8pt6UOQueuiOGLG79qrs-C-A,35872
60
- lemonade/tools/server/static/js/shared.js,sha256=mD03xqyMH1iQwH4pOq4IpDDaAX0z7YZY71gD8gufRAg,17487
63
+ lemonade/tools/server/static/js/models.js,sha256=es3LwrU49UtoC59e-AusUpdSXQnIRNsZvw8f05keAM0,37856
64
+ lemonade/tools/server/static/js/shared.js,sha256=NVu7lXotLnVVh1x_eXqxX1bLzYyW_eMQttOZ9f0RqUQ,17591
65
+ lemonade/tools/server/utils/macos_tray.py,sha256=xwHW44ZN5hDVlJcwIpHHfqn4VRXWxXHuDACaT-ZqdO8,7095
61
66
  lemonade/tools/server/utils/port.py,sha256=J7-g-Aqygb50jNoHLhhRfBZVM-uhGlcB5-oYBAehvgw,2263
62
- lemonade/tools/server/utils/system_tray.py,sha256=b9lvNv9chJKQxvmH7qzAuUe6H9HsLu7pdHFqGlAJaL0,12654
63
67
  lemonade/tools/server/utils/thread.py,sha256=Z-PDzGcpgfN2qxTmtlROWqrUN0B2fXdPrqo_J10fR_w,2772
68
+ lemonade/tools/server/utils/windows_tray.py,sha256=2z5aTmUPlkT-QfkcfwHsyA6dv6nSNBT0gXUErarhac8,13170
64
69
  lemonade_install/__init__.py,sha256=26zohKg2jgr_5y7tObduWMYQg8zCTWMZHL8lfi2zZVQ,40
65
- lemonade_install/install.py,sha256=Dow7kt-K9WI4PH15hBwkKtOxede3dAaOmH4I1y_P5H4,27008
66
- lemonade_sdk-8.1.9.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
67
- lemonade_sdk-8.1.9.dist-info/licenses/NOTICE.md,sha256=RSca9LE5e6pvdWA_LXAUCcACIHPmINKqkRX-AVRqBGo,3499
68
- lemonade_server/cli.py,sha256=IsyWGDIeiN0MnIaBryLhArHhPj1HD52l6028dNHQCKc,19248
69
- lemonade_server/model_manager.py,sha256=V8QRf1nlh3wAFtUHoSF_JeAXeR7sfaZE1uTfppcIfcw,20492
70
- lemonade_server/pydantic_models.py,sha256=49MyOlb5feLUlKsGcI75tWaflWckrItqcSVkdCY4e3A,3269
71
- lemonade_server/server_models.json,sha256=0H_G6Jw6Yuz6t0RZnFnq0SbBCsw_cQLe9j24TkyF2eI,12344
72
- lemonade_server/settings.py,sha256=6nsmPLFJD-UokQDmlx9ZBYMbpnn48So_PuBGWP7Fmfg,1299
73
- lemonade_sdk-8.1.9.dist-info/METADATA,sha256=rG009a2eVI1apZTBLwIcyQNwdYuVmX33naZwN8Rhbbs,15022
74
- lemonade_sdk-8.1.9.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
75
- lemonade_sdk-8.1.9.dist-info/entry_points.txt,sha256=7sRvpNhi1E7amnM7RZo57e8yFF9iA5uuRaIeJ1Xre6w,193
76
- lemonade_sdk-8.1.9.dist-info/top_level.txt,sha256=10ap5GNiPhalO4V50LRoxA1FqRT9g3Xkia6BITu880k,42
77
- lemonade_sdk-8.1.9.dist-info/RECORD,,
70
+ lemonade_install/install.py,sha256=p3pYqhUnLQ9JJMcbjlSYDYqN-amnU_535O9Oj1yPbyM,27608
71
+ lemonade_sdk-8.1.11.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
72
+ lemonade_sdk-8.1.11.dist-info/licenses/NOTICE.md,sha256=RSca9LE5e6pvdWA_LXAUCcACIHPmINKqkRX-AVRqBGo,3499
73
+ lemonade_server/cli.py,sha256=mzlIqLeGkU96KHuib8lmZn0snU7XjQ9lspOy83dVplo,23401
74
+ lemonade_server/model_manager.py,sha256=6W6_nQea6hLD82Il2o_EgQ7oNaLfICXPKNjZiY9Y1Xk,26331
75
+ lemonade_server/pydantic_models.py,sha256=5U3PZ__UqcWQh-dNXVBc-vyJc6-In2vngZXP9VmiScM,3954
76
+ lemonade_server/server_models.json,sha256=_GxymNW7gBkJcTjzncYUvPUJ8kc-I3qqWwHqPztzYcA,14644
77
+ lemonade_server/settings.py,sha256=JOlZmirUXO9rA6BCODVFwyXrrHtYoH_LiKYm49lGm_c,1260
78
+ lemonade_sdk-8.1.11.dist-info/METADATA,sha256=uLq8pbAD_uriJbpPT25i5gk-Ukq_RbWzzEckgtceFDo,15334
79
+ lemonade_sdk-8.1.11.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
80
+ lemonade_sdk-8.1.11.dist-info/entry_points.txt,sha256=7sRvpNhi1E7amnM7RZo57e8yFF9iA5uuRaIeJ1Xre6w,193
81
+ lemonade_sdk-8.1.11.dist-info/top_level.txt,sha256=10ap5GNiPhalO4V50LRoxA1FqRT9g3Xkia6BITu880k,42
82
+ lemonade_sdk-8.1.11.dist-info/RECORD,,
lemonade_server/cli.py CHANGED
@@ -1,6 +1,7 @@
1
1
  import argparse
2
2
  import sys
3
3
  import os
4
+ import platform
4
5
  from typing import Tuple, Optional
5
6
  import psutil
6
7
  from typing import List
@@ -104,12 +105,34 @@ def serve(
104
105
  max_wait_time = 30
105
106
  wait_interval = 0.5
106
107
  waited = 0
107
- while waited < max_wait_time:
108
- time.sleep(wait_interval)
109
- _, running_port = get_server_info()
110
- if running_port is not None:
111
- break
112
- waited += wait_interval
108
+
109
+ if platform.system() == "Darwin":
110
+ # On macOS, use direct HTTP health check instead of process scanning for better
111
+ # performance
112
+ import requests
113
+
114
+ while waited < max_wait_time:
115
+ time.sleep(wait_interval)
116
+ try:
117
+ response = requests.get(
118
+ f"http://{host}:{port}/api/v1/health", timeout=1
119
+ )
120
+ if response.status_code == 200:
121
+ break
122
+ except (
123
+ requests.exceptions.ConnectionError,
124
+ requests.exceptions.Timeout,
125
+ ):
126
+ pass # Server not ready yet
127
+ waited += wait_interval
128
+ else:
129
+ # On other platforms, use the existing approach
130
+ while waited < max_wait_time:
131
+ time.sleep(wait_interval)
132
+ _, running_port = get_server_info()
133
+ if running_port is not None:
134
+ break
135
+ waited += wait_interval
113
136
 
114
137
  return port, server_thread
115
138
 
@@ -176,6 +199,7 @@ def pull(
176
199
  checkpoint: Optional[str] = None,
177
200
  recipe: Optional[str] = None,
178
201
  reasoning: bool = False,
202
+ vision: bool = False,
179
203
  mmproj: str = "",
180
204
  ):
181
205
  """
@@ -202,6 +226,7 @@ def pull(
202
226
  ("checkpoint", checkpoint),
203
227
  ("recipe", recipe),
204
228
  ("reasoning", reasoning),
229
+ ("vision", vision),
205
230
  ("mmproj", mmproj),
206
231
  ]:
207
232
  if value:
@@ -224,6 +249,7 @@ def pull(
224
249
  checkpoint=checkpoint,
225
250
  recipe=recipe,
226
251
  reasoning=reasoning,
252
+ vision=vision,
227
253
  mmproj=mmproj,
228
254
  # The pull command will download an upgraded model if available, even
229
255
  # if we already have a local copy of the model
@@ -282,6 +308,10 @@ def run(
282
308
  import time
283
309
  import os
284
310
 
311
+ # Disable tray on macOS for run command due to threading issues
312
+ if platform.system() == "Darwin":
313
+ tray = False
314
+
285
315
  # Start the server if not running
286
316
  _, running_port = get_server_info()
287
317
  server_previously_running = running_port is not None
@@ -367,6 +397,23 @@ def is_lemonade_server(pid):
367
397
  """
368
398
  Check whether or not a given PID corresponds to a Lemonade server
369
399
  """
400
+ # macOS only: Self-exclusion to prevent blocking server startup
401
+ if platform.system() == "Darwin":
402
+ current_pid = os.getpid()
403
+ if pid == current_pid:
404
+ return False
405
+
406
+ # Exclude children of current process to avoid detecting status commands
407
+ try:
408
+ current_process = psutil.Process(current_pid)
409
+ child_pids = [
410
+ child.pid for child in current_process.children(recursive=True)
411
+ ]
412
+ if pid in child_pids:
413
+ return False
414
+ except (psutil.NoSuchProcess, psutil.AccessDenied):
415
+ pass
416
+
370
417
  try:
371
418
  process = psutil.Process(pid)
372
419
 
@@ -375,11 +422,29 @@ def is_lemonade_server(pid):
375
422
  if process_name in [ # Windows
376
423
  "lemonade-server-dev.exe",
377
424
  "lemonade-server.exe",
425
+ "lsdev.exe",
378
426
  ] or process_name in [ # Linux
379
427
  "lemonade-server-dev",
380
428
  "lemonade-server",
429
+ "lsdev",
381
430
  ]:
382
431
  return True
432
+ # macOS only: Python scripts appear as "python3.x", check command line
433
+ elif process_name.startswith("python") and platform.system() == "Darwin":
434
+ try:
435
+ cmdline = process.cmdline()
436
+ if len(cmdline) >= 2:
437
+ script_path = cmdline[1]
438
+ # Check for various lemonade server command patterns (macOS only)
439
+ lemonade_patterns = [
440
+ "lemonade-server-dev",
441
+ "lemonade-server",
442
+ "lsdev", # Short alias for lemonade-server-dev
443
+ ]
444
+ if any(pattern in script_path for pattern in lemonade_patterns):
445
+ return True
446
+ except (psutil.AccessDenied, psutil.NoSuchProcess):
447
+ pass
383
448
  elif "llama-server" in process_name:
384
449
  return False
385
450
  if not process.parent():
@@ -397,18 +462,43 @@ def get_server_info() -> Tuple[int | None, int | None]:
397
462
  2. The port that Lemonade Server is running on
398
463
  """
399
464
 
400
- # Get all network connections and filter for localhost IPv4 listening ports
465
+ # Try the global approach first (works on Windows/Linux without permissions)
401
466
  try:
402
467
  connections = psutil.net_connections(kind="tcp4")
403
-
404
468
  for conn in connections:
405
469
  if conn.status == "LISTEN" and conn.laddr and conn.pid is not None:
406
470
  if is_lemonade_server(conn.pid):
407
471
  return conn.pid, conn.laddr.port
408
-
409
- except Exception:
472
+ except (psutil.AccessDenied, PermissionError):
473
+ # Global approach needs elevated permissions on macOS, fall back to per-process approach
474
+ pass
475
+ except Exception: # pylint: disable=broad-exception-caught
410
476
  pass
411
477
 
478
+ # Per-process approach (macOS only - needs this due to permission requirements)
479
+ if platform.system() == "Darwin":
480
+ try:
481
+ for proc in psutil.process_iter(["pid", "name"]):
482
+ try:
483
+ pid = proc.info["pid"]
484
+ if is_lemonade_server(pid):
485
+ # Found a lemonade server, check its listening ports
486
+ connections = proc.net_connections(kind="inet")
487
+ for conn in connections:
488
+ if conn.status == "LISTEN" and conn.laddr:
489
+ return pid, conn.laddr.port
490
+ # If no listening connections found, this process is not actually serving
491
+ # Continue looking for other processes
492
+ except (
493
+ psutil.NoSuchProcess,
494
+ psutil.AccessDenied,
495
+ psutil.ZombieProcess,
496
+ ):
497
+ # Some processes may be inaccessible, continue to next
498
+ continue
499
+ except Exception: # pylint: disable=broad-exception-caught
500
+ pass
501
+
412
502
  return None, None
413
503
 
414
504
 
@@ -423,12 +513,13 @@ def list_models():
423
513
 
424
514
  # Get all supported models and downloaded models
425
515
  supported_models = model_manager.supported_models
516
+ filtered_models = model_manager.filter_models_by_backend(supported_models)
426
517
  downloaded_models = model_manager.downloaded_models
427
518
 
428
519
  # Filter to only show recommended models
429
520
  recommended_models = {
430
521
  model_name: model_info
431
- for model_name, model_info in supported_models.items()
522
+ for model_name, model_info in filtered_models.items()
432
523
  if model_info.get("suggested", False)
433
524
  }
434
525
 
@@ -505,7 +596,7 @@ def _add_server_arguments(parser):
505
596
  "--llamacpp",
506
597
  type=str,
507
598
  help="LlamaCpp backend to use",
508
- choices=["vulkan", "rocm"],
599
+ choices=["vulkan", "rocm", "metal"],
509
600
  default=DEFAULT_LLAMACPP_BACKEND,
510
601
  )
511
602
  parser.add_argument(
@@ -518,7 +609,7 @@ def _add_server_arguments(parser):
518
609
  default=DEFAULT_CTX_SIZE,
519
610
  )
520
611
 
521
- if os.name == "nt":
612
+ if os.name == "nt" or platform.system() == "Darwin":
522
613
  parser.add_argument(
523
614
  "--no-tray",
524
615
  action="store_true",
@@ -618,7 +709,7 @@ def main():
618
709
 
619
710
  args = parser.parse_args()
620
711
 
621
- if os.name != "nt":
712
+ if os.name != "nt" and platform.system() != "Darwin":
622
713
  args.no_tray = True
623
714
 
624
715
  if args.version: