lemonade-sdk 8.1.10__py3-none-any.whl → 8.1.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lemonade-sdk might be problematic. Click here for more details.

Files changed (30) hide show
  1. lemonade/tools/flm/__init__.py +1 -0
  2. lemonade/tools/flm/utils.py +255 -0
  3. lemonade/tools/llamacpp/utils.py +58 -10
  4. lemonade/tools/server/flm.py +137 -0
  5. lemonade/tools/server/llamacpp.py +23 -5
  6. lemonade/tools/server/serve.py +260 -135
  7. lemonade/tools/server/static/js/chat.js +165 -82
  8. lemonade/tools/server/static/js/models.js +87 -54
  9. lemonade/tools/server/static/js/shared.js +5 -3
  10. lemonade/tools/server/static/logs.html +47 -0
  11. lemonade/tools/server/static/styles.css +159 -8
  12. lemonade/tools/server/static/webapp.html +28 -10
  13. lemonade/tools/server/tray.py +94 -38
  14. lemonade/tools/server/utils/macos_tray.py +226 -0
  15. lemonade/tools/server/utils/{system_tray.py → windows_tray.py} +13 -0
  16. lemonade/tools/server/webapp.py +4 -1
  17. lemonade/tools/server/wrapped_server.py +91 -25
  18. lemonade/version.py +1 -1
  19. lemonade_install/install.py +25 -2
  20. {lemonade_sdk-8.1.10.dist-info → lemonade_sdk-8.1.11.dist-info}/METADATA +9 -6
  21. {lemonade_sdk-8.1.10.dist-info → lemonade_sdk-8.1.11.dist-info}/RECORD +30 -25
  22. lemonade_server/cli.py +103 -14
  23. lemonade_server/model_manager.py +186 -45
  24. lemonade_server/pydantic_models.py +25 -1
  25. lemonade_server/server_models.json +162 -62
  26. {lemonade_sdk-8.1.10.dist-info → lemonade_sdk-8.1.11.dist-info}/WHEEL +0 -0
  27. {lemonade_sdk-8.1.10.dist-info → lemonade_sdk-8.1.11.dist-info}/entry_points.txt +0 -0
  28. {lemonade_sdk-8.1.10.dist-info → lemonade_sdk-8.1.11.dist-info}/licenses/LICENSE +0 -0
  29. {lemonade_sdk-8.1.10.dist-info → lemonade_sdk-8.1.11.dist-info}/licenses/NOTICE.md +0 -0
  30. {lemonade_sdk-8.1.10.dist-info → lemonade_sdk-8.1.11.dist-info}/top_level.txt +0 -0
@@ -10,11 +10,13 @@ import traceback
10
10
  from typing import Optional, Union
11
11
  import json
12
12
  from pathlib import Path
13
+ import os
13
14
 
14
- from fastapi import FastAPI, HTTPException, status, Request
15
+ from fastapi import FastAPI, HTTPException, status, Request, WebSocket
15
16
  from fastapi.responses import StreamingResponse
16
17
  from fastapi.middleware.cors import CORSMiddleware
17
18
  from fastapi.staticfiles import StaticFiles
19
+ from starlette.websockets import WebSocketDisconnect, WebSocketState
18
20
  import uvicorn
19
21
  from uvicorn.config import Config
20
22
  from uvicorn.server import Server as UvicornServer
@@ -48,6 +50,7 @@ from openai.types.responses import (
48
50
  import lemonade.api as lemonade_api
49
51
  from lemonade.tools.server.wrapped_server import WrappedServer
50
52
  from lemonade.tools.server.llamacpp import LlamaServer
53
+ from lemonade.tools.server.flm import FlmServer
51
54
  from lemonade.tools.server.tool_calls import extract_tool_calls, get_tool_call_pattern
52
55
  from lemonade.tools.server.webapp import get_webapp_html
53
56
  from lemonade.tools.server.utils.port import lifespan
@@ -75,12 +78,62 @@ from lemonade_server.settings import save_setting
75
78
  # Tests should use the max_new_tokens argument to set a lower value
76
79
  DEFAULT_MAX_NEW_TOKENS = 1500
77
80
 
78
- # Only import tray on Windows
79
- if platform.system() == "Windows":
81
+ if platform.system() in ["Windows", "Darwin"]:
80
82
  # pylint: disable=ungrouped-imports
81
83
  from lemonade.tools.server.tray import LemonadeTray, OutputDuplicator
82
84
 
83
85
 
86
+ class WebsocketTextFilter(logging.Filter):
87
+ def filter(self, record: logging.LogRecord) -> bool:
88
+ # Only allow logs that don't include "> TEXT"
89
+ return "> TEXT" not in record.getMessage()
90
+
91
+
92
+ async def log_streamer(websocket: WebSocket, path: str, interval: float = 1.0):
93
+ logger = logging.getLogger()
94
+ await websocket.accept()
95
+ try:
96
+ with open(path, "r", encoding="utf-8") as f:
97
+ f.seek(0, os.SEEK_END) # start at end
98
+ while True:
99
+ # Try reading a line
100
+ line = f.readline()
101
+ if not line:
102
+ await asyncio.sleep(interval)
103
+ continue
104
+
105
+ # Send defensively: if disconnected, bail out
106
+ if websocket.application_state != WebSocketState.CONNECTED:
107
+ # Server-side state says we're not connected anymore
108
+ break
109
+
110
+ try:
111
+ await websocket.send_text(line)
112
+ except WebSocketDisconnect:
113
+ # Client closed — normal path out
114
+ break
115
+ except RuntimeError as re:
116
+ # Starlette will raise this if a close has already been sent
117
+ logger.debug("RuntimeError during send: %s", re)
118
+ break
119
+
120
+ except WebSocketDisconnect:
121
+ # Client closed the socket; do not try to send or close again
122
+ pass
123
+ except Exception as e: # pylint: disable=broad-except
124
+ # Log server-side; do not attempt to send error over a possibly closed socket
125
+ logger.exception("Error in log_streamer: %s", e)
126
+ finally:
127
+ # Only close if Starlette still thinks we're connected.
128
+ # This prevents "Cannot call send once a close message has been sent."
129
+ try:
130
+ if websocket.application_state == WebSocketState.CONNECTED:
131
+ await websocket.close()
132
+ except Exception: # pylint: disable=broad-except
133
+ # If close itself races, swallow — we're shutting down anyway.
134
+ pass
135
+
136
+
84
137
  class ServerModel(Model):
85
138
  """
86
139
  An extension of OpenAI's Model class that adds
@@ -204,6 +257,12 @@ class Server:
204
257
  allow_headers=["*"], # Allows all headers
205
258
  )
206
259
 
260
+ # Set up debug middleware if debug logging is enabled
261
+ # This must be done during app initialization, not at runtime
262
+ self.debug_logging_enabled = log_level == "debug"
263
+ if self.debug_logging_enabled:
264
+ self.setup_middleware_timer()
265
+
207
266
  # Set up custom routes
208
267
  self.setup_routes(["/api/v0", "/api/v1"])
209
268
 
@@ -265,6 +324,7 @@ class Server:
265
324
  self.app.post(f"{prefix}/completions")(self.completions)
266
325
  self.app.post(f"{prefix}/responses")(self.responses)
267
326
  self.app.post(f"{prefix}/log-level")(self.set_log_level)
327
+ self.app.websocket(f"{prefix}/logs/ws")(self.logs_ws)
268
328
 
269
329
  # OpenAI-compatible routes
270
330
  self.app.post(f"{prefix}/chat/completions")(self.chat_completions)
@@ -394,11 +454,13 @@ class Server:
394
454
  )
395
455
  file_handler.setLevel(logging_level)
396
456
  file_handler.setFormatter(uvicorn_formatter)
457
+ file_handler.addFilter(WebsocketTextFilter())
397
458
 
398
459
  # Set up console handler
399
460
  console_handler = logging.StreamHandler()
400
461
  console_handler.setLevel(logging_level)
401
462
  console_handler.setFormatter(uvicorn_formatter)
463
+ console_handler.addFilter(WebsocketTextFilter())
402
464
 
403
465
  # Configure root logger with both handlers
404
466
  logging.basicConfig(
@@ -421,10 +483,6 @@ class Server:
421
483
  ).run()
422
484
  sys.exit(0)
423
485
 
424
- if self.debug_logging_enabled:
425
- # Print the elapsed time for each request
426
- self.setup_middleware_timer()
427
-
428
486
  # Let the app know what port it's running on, so
429
487
  # that the lifespan can access it
430
488
  self.app.port = self.port
@@ -521,7 +579,9 @@ class Server:
521
579
 
522
580
  return lc
523
581
 
524
- async def completions(self, completion_request: CompletionRequest):
582
+ async def completions(
583
+ self, completion_request: CompletionRequest, request: Request
584
+ ):
525
585
  """
526
586
  Stream completion responses using HTTP chunked transfer encoding.
527
587
  """
@@ -534,7 +594,7 @@ class Server:
534
594
  # Load the model if it's different from the currently loaded one
535
595
  await self.load_llm(lc)
536
596
 
537
- if self.llm_loaded.recipe == "llamacpp":
597
+ if self.llm_loaded.recipe == "llamacpp" or self.llm_loaded.recipe == "flm":
538
598
  return self.wrapped_server.completion(completion_request)
539
599
 
540
600
  # Check if the model supports reasoning
@@ -573,29 +633,43 @@ class Server:
573
633
  # This is necessary because the variable is modified
574
634
  # in the inner function
575
635
  nonlocal reasoning_first_token
636
+ try:
637
+ async for token in self._generate_tokens(**generation_args):
638
+ # Handle client disconnect: stop generation and exit
639
+ if await request.is_disconnected():
640
+ self.stop_event.set()
641
+ break
576
642
 
577
- async for token in self._generate_tokens(**generation_args):
578
- choice = CompletionChoice(
579
- text=("<think>" + token if reasoning_first_token else token),
580
- index=0,
581
- finish_reason="stop",
582
- logprobs=None,
583
- )
643
+ choice = CompletionChoice(
644
+ text=(
645
+ "<think>" + token if reasoning_first_token else token
646
+ ),
647
+ index=0,
648
+ finish_reason="stop",
649
+ logprobs=None,
650
+ )
584
651
 
585
- completion = Completion(
586
- id="0",
587
- choices=[choice],
588
- model=self.llm_loaded.checkpoint,
589
- object="text_completion",
590
- created=int(time.time()),
591
- )
652
+ completion = Completion(
653
+ id="0",
654
+ choices=[choice],
655
+ model=self.llm_loaded.checkpoint,
656
+ object="text_completion",
657
+ created=int(time.time()),
658
+ )
592
659
 
593
- # Format as SSE
594
- reasoning_first_token = False
595
- yield f"data: {completion.model_dump_json()}\n\n".encode("utf-8")
660
+ # Format as SSE
661
+ reasoning_first_token = False
662
+ yield f"data: {completion.model_dump_json()}\n\n".encode(
663
+ "utf-8"
664
+ )
596
665
 
597
- # Send the [DONE] marker
598
- yield b"data: [DONE]\n\n"
666
+ # Send the [DONE] marker only if still connected
667
+ if not await request.is_disconnected():
668
+ yield b"data: [DONE]\n\n"
669
+ except asyncio.CancelledError:
670
+ # Propagate cancellation to the generator loop
671
+ self.stop_event.set()
672
+ return
599
673
 
600
674
  return StreamingResponse(
601
675
  generate(),
@@ -653,7 +727,9 @@ class Server:
653
727
  created=int(time.time()),
654
728
  )
655
729
 
656
- async def chat_completions(self, chat_completion_request: ChatCompletionRequest):
730
+ async def chat_completions(
731
+ self, chat_completion_request: ChatCompletionRequest, request: Request
732
+ ):
657
733
  """
658
734
  Stream chat completion responses using HTTP chunked transfer encoding.
659
735
  """
@@ -669,7 +745,7 @@ class Server:
669
745
  # Load the model if it's different from the currently loaded one
670
746
  await self.load_llm(lc)
671
747
 
672
- if self.llm_loaded.recipe == "llamacpp":
748
+ if self.llm_loaded.recipe == "llamacpp" or self.llm_loaded.recipe == "flm":
673
749
  return self.wrapped_server.chat_completion(chat_completion_request)
674
750
 
675
751
  # Convert chat messages to text using the model's chat template
@@ -731,69 +807,80 @@ class Server:
731
807
 
732
808
  # Keep track of the full response for tool call extraction
733
809
  full_response = ""
810
+ try:
811
+ async for token in self._generate_tokens(**generation_args):
812
+ # Handle client disconnect: stop generation and exit
813
+ if await request.is_disconnected():
814
+ self.stop_event.set()
815
+ break
734
816
 
735
- async for token in self._generate_tokens(**generation_args):
736
- # Continuously look for tool calls embedded into the generated text
737
- openai_tool_calls = None
738
- if chat_completion_request.tools:
817
+ # Continuously look for tool calls embedded into the generated text
818
+ openai_tool_calls = None
819
+ if chat_completion_request.tools:
739
820
 
740
- # Append the token to the full response
741
- full_response += token
821
+ # Append the token to the full response
822
+ full_response += token
742
823
 
743
- tool_calls, _ = extract_tool_calls(
744
- full_response,
745
- tool_call_pattern,
746
- )
824
+ tool_calls, _ = extract_tool_calls(
825
+ full_response,
826
+ tool_call_pattern,
827
+ )
747
828
 
748
- # If there are tool calls, reset the full response for the next tool call
749
- if tool_calls:
750
- openai_tool_calls = []
751
- full_response = ""
752
- for tool_call in tool_calls:
753
- openai_tool_calls.append(
754
- ChoiceDeltaToolCall(
755
- index=0,
756
- id="-",
757
- function=ChoiceDeltaToolCallFunction(
758
- arguments=json.dumps(tool_call["arguments"]),
759
- name=tool_call["name"],
760
- ),
761
- type="function",
829
+ # If there are tool calls, reset the full response for the next call
830
+ if tool_calls:
831
+ openai_tool_calls = []
832
+ full_response = ""
833
+ for tool_call in tool_calls:
834
+ openai_tool_calls.append(
835
+ ChoiceDeltaToolCall(
836
+ index=0,
837
+ id="-",
838
+ function=ChoiceDeltaToolCallFunction(
839
+ arguments=json.dumps(
840
+ tool_call["arguments"]
841
+ ),
842
+ name=tool_call["name"],
843
+ ),
844
+ type="function",
845
+ )
762
846
  )
763
- )
764
847
 
765
- # Create a ChatCompletionChunk
766
- chunk = ChatCompletionChunk.model_construct(
767
- id="0",
768
- object="chat.completion.chunk",
769
- created=int(time.time()),
770
- model=self.llm_loaded.checkpoint,
771
- choices=[
772
- Choice.model_construct(
773
- index=0,
774
- delta=ChoiceDelta(
775
- content=(
776
- "<think>" + token
777
- if reasoning_first_token
778
- else token
848
+ # Create a ChatCompletionChunk
849
+ chunk = ChatCompletionChunk.model_construct(
850
+ id="0",
851
+ object="chat.completion.chunk",
852
+ created=int(time.time()),
853
+ model=self.llm_loaded.checkpoint,
854
+ choices=[
855
+ Choice.model_construct(
856
+ index=0,
857
+ delta=ChoiceDelta(
858
+ content=(
859
+ "<think>" + token
860
+ if reasoning_first_token
861
+ else token
862
+ ),
863
+ function_call=None,
864
+ role="assistant",
865
+ tool_calls=openai_tool_calls,
866
+ refusal=None,
779
867
  ),
780
- function_call=None,
781
- role="assistant",
782
- tool_calls=openai_tool_calls,
783
- refusal=None,
784
- ),
785
- finish_reason=None,
786
- logprobs=None,
787
- )
788
- ],
789
- )
868
+ finish_reason=None,
869
+ logprobs=None,
870
+ )
871
+ ],
872
+ )
790
873
 
791
- # Format as SSE
792
- reasoning_first_token = False
793
- yield f"data: {chunk.model_dump_json()}\n\n".encode("utf-8")
874
+ # Format as SSE
875
+ reasoning_first_token = False
876
+ yield f"data: {chunk.model_dump_json()}\n\n".encode("utf-8")
794
877
 
795
- # Send the [DONE] marker
796
- yield b"data: [DONE]\n\n"
878
+ # Send the [DONE] marker only if still connected
879
+ if not await request.is_disconnected():
880
+ yield b"data: [DONE]\n\n"
881
+ except asyncio.CancelledError:
882
+ self.stop_event.set()
883
+ return
797
884
 
798
885
  return StreamingResponse(
799
886
  generate(),
@@ -952,7 +1039,7 @@ class Server:
952
1039
  formatted_messages.append(f"{role_marker}\n{content} <|end|>")
953
1040
  return "\n".join(formatted_messages) + "\n<|assistant|>"
954
1041
 
955
- async def responses(self, responses_request: ResponsesRequest):
1042
+ async def responses(self, responses_request: ResponsesRequest, request: Request):
956
1043
  """
957
1044
  Stream responses using HTTP chunked transfer encoding.
958
1045
  """
@@ -965,6 +1052,12 @@ class Server:
965
1052
  # Load the model if it's different from the currently loaded one
966
1053
  await self.load_llm(lc)
967
1054
 
1055
+ if self.llm_loaded.recipe == "llamacpp":
1056
+ raise HTTPException(
1057
+ status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
1058
+ detail=f"Responses API not supported for recipe: {self.llm_loaded.recipe}",
1059
+ )
1060
+
968
1061
  # Convert chat messages to text using the model's chat template
969
1062
  if isinstance(responses_request.input, str):
970
1063
  text = responses_request.input
@@ -1018,56 +1111,71 @@ class Server:
1018
1111
 
1019
1112
  full_response = "<think>" if reasoning_first_token else ""
1020
1113
 
1021
- async for token in self._generate_tokens(**generation_args):
1114
+ try:
1115
+ async for token in self._generate_tokens(**generation_args):
1116
+ # Handle client disconnect: stop generation and exit
1117
+ if await request.is_disconnected():
1118
+ self.stop_event.set()
1119
+ break
1022
1120
 
1023
- # Create an event
1024
- delta_event = ResponseTextDeltaEvent(
1025
- content_index=0,
1026
- delta=("<think>" + token if reasoning_first_token else token),
1027
- item_id="0 ",
1028
- output_index=0,
1029
- type="response.output_text.delta",
1030
- sequence_number=0,
1031
- )
1032
- full_response += token
1121
+ # Create an event
1122
+ delta_event = ResponseTextDeltaEvent(
1123
+ content_index=0,
1124
+ delta=(
1125
+ "<think>" + token if reasoning_first_token else token
1126
+ ),
1127
+ item_id="0 ",
1128
+ output_index=0,
1129
+ type="response.output_text.delta",
1130
+ sequence_number=0,
1131
+ )
1132
+ full_response += token
1033
1133
 
1034
- # Format as SSE
1035
- reasoning_first_token = False
1036
- yield f"data: {delta_event.model_dump_json()}\n\n".encode("utf-8")
1134
+ # Format as SSE
1135
+ reasoning_first_token = False
1136
+ yield f"data: {delta_event.model_dump_json()}\n\n".encode(
1137
+ "utf-8"
1138
+ )
1037
1139
 
1038
- # Send the completed event
1039
- response_output_message = ResponseOutputMessage(
1040
- id="0",
1041
- content=[
1042
- ResponseOutputText(
1043
- annotations=[],
1044
- text=full_response,
1045
- type="output_text",
1140
+ # Send the completed event (only if still connected)
1141
+ if not await request.is_disconnected():
1142
+ response_output_message = ResponseOutputMessage(
1143
+ id="0",
1144
+ content=[
1145
+ ResponseOutputText(
1146
+ annotations=[],
1147
+ text=full_response,
1148
+ type="output_text",
1149
+ )
1150
+ ],
1151
+ role="assistant",
1152
+ status="completed",
1153
+ type="message",
1154
+ )
1155
+ response = Response(
1156
+ id="0",
1157
+ model=self.llm_loaded.checkpoint,
1158
+ created_at=int(time.time()),
1159
+ object="response",
1160
+ output=[response_output_message],
1161
+ parallel_tool_calls=True,
1162
+ tool_choice="auto",
1163
+ tools=[],
1164
+ )
1165
+ completed_event = ResponseCompletedEvent(
1166
+ response=response,
1167
+ type="response.completed",
1168
+ sequence_number=0,
1169
+ )
1170
+ yield f"data: {completed_event.model_dump_json()}\n\n".encode(
1171
+ "utf-8"
1046
1172
  )
1047
- ],
1048
- role="assistant",
1049
- status="completed",
1050
- type="message",
1051
- )
1052
- response = Response(
1053
- id="0",
1054
- model=self.llm_loaded.checkpoint,
1055
- created_at=int(time.time()),
1056
- object="response",
1057
- output=[response_output_message],
1058
- parallel_tool_calls=True,
1059
- tool_choice="auto",
1060
- tools=[],
1061
- )
1062
- completed_event = ResponseCompletedEvent(
1063
- response=response,
1064
- type="response.completed",
1065
- sequence_number=0,
1066
- )
1067
- yield f"data: {completed_event.model_dump_json()}\n\n".encode("utf-8")
1068
1173
 
1069
- # Send the [DONE] marker
1070
- yield b"data: [DONE]\n\n"
1174
+ # Send the [DONE] marker
1175
+ yield b"data: [DONE]\n\n"
1176
+ except asyncio.CancelledError:
1177
+ self.stop_event.set()
1178
+ return
1071
1179
 
1072
1180
  return StreamingResponse(
1073
1181
  generate(),
@@ -1312,8 +1420,10 @@ class Server:
1312
1420
  """
1313
1421
  Send performance statistics to the client.
1314
1422
  """
1315
- # If using llama server, get telemetry from the telemetry instance
1316
- if self.llm_loaded and self.llm_loaded.recipe == "llamacpp":
1423
+ # If using wrapped server, get telemetry from the telemetry instance
1424
+ if self.llm_loaded and (
1425
+ self.llm_loaded.recipe == "llamacpp" or self.llm_loaded.recipe == "flm"
1426
+ ):
1317
1427
  return self.wrapped_server.telemetry.get_telemetry_data()
1318
1428
 
1319
1429
  # For built-in server, use the existing telemetry
@@ -1414,6 +1524,7 @@ class Server:
1414
1524
  checkpoint=config.checkpoint,
1415
1525
  recipe=config.recipe,
1416
1526
  reasoning=config.reasoning,
1527
+ vision=config.vision,
1417
1528
  mmproj=config.mmproj,
1418
1529
  # The pull endpoint will download an upgraded model if available, even
1419
1530
  # if we already have a local copy of the model
@@ -1493,8 +1604,8 @@ class Server:
1493
1604
  ):
1494
1605
  if (
1495
1606
  self.llm_loaded.recipe == "llamacpp"
1496
- and self.wrapped_server.process.poll()
1497
- ):
1607
+ or self.llm_loaded.recipe == "flm"
1608
+ ) and self.wrapped_server.process.poll():
1498
1609
  # wrapped server process has gone away for some reason, so we should
1499
1610
  # proceed with loading to get it back
1500
1611
  pass
@@ -1518,6 +1629,14 @@ class Server:
1518
1629
  do_not_upgrade=True,
1519
1630
  )
1520
1631
 
1632
+ elif config_to_use.recipe == "flm":
1633
+ self.wrapped_server = FlmServer()
1634
+ self.wrapped_server.load(
1635
+ model_config=config_to_use,
1636
+ ctx_size=self.ctx_size,
1637
+ do_not_upgrade=True,
1638
+ )
1639
+
1521
1640
  else:
1522
1641
  self.model, self.tokenizer = lemonade_api.from_pretrained(
1523
1642
  checkpoint=config_to_use.checkpoint, recipe=config_to_use.recipe
@@ -1554,7 +1673,7 @@ class Server:
1554
1673
  for _ in range(self.max_concurrent_generations):
1555
1674
  await self._generate_semaphore.acquire()
1556
1675
 
1557
- if self.llm_loaded.recipe == "llamacpp":
1676
+ if self.llm_loaded.recipe == "llamacpp" or self.llm_loaded.recipe == "flm":
1558
1677
  self.wrapped_server.process.terminate()
1559
1678
 
1560
1679
  self.llm_loaded = None
@@ -1657,6 +1776,12 @@ class Server:
1657
1776
  logging.debug(f"Total request time: {request_time:.4f} seconds")
1658
1777
  return response
1659
1778
 
1779
+ async def logs_ws(self, websocket: WebSocket):
1780
+ if not self.log_file or not os.path.exists(self.log_file):
1781
+ await websocket.close(code=4000)
1782
+ return
1783
+ await log_streamer(websocket, self.log_file)
1784
+
1660
1785
 
1661
1786
  # This file was originally licensed under Apache 2.0. It has been modified.
1662
1787
  # Modifications Copyright (c) 2025 AMD