mantisdk 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mantisdk might be problematic. Click here for more details.

Files changed (190) hide show
  1. mantisdk/__init__.py +22 -0
  2. mantisdk/adapter/__init__.py +15 -0
  3. mantisdk/adapter/base.py +94 -0
  4. mantisdk/adapter/messages.py +270 -0
  5. mantisdk/adapter/triplet.py +1028 -0
  6. mantisdk/algorithm/__init__.py +39 -0
  7. mantisdk/algorithm/apo/__init__.py +5 -0
  8. mantisdk/algorithm/apo/apo.py +889 -0
  9. mantisdk/algorithm/apo/prompts/apply_edit_variant01.poml +22 -0
  10. mantisdk/algorithm/apo/prompts/apply_edit_variant02.poml +18 -0
  11. mantisdk/algorithm/apo/prompts/text_gradient_variant01.poml +18 -0
  12. mantisdk/algorithm/apo/prompts/text_gradient_variant02.poml +16 -0
  13. mantisdk/algorithm/apo/prompts/text_gradient_variant03.poml +107 -0
  14. mantisdk/algorithm/base.py +162 -0
  15. mantisdk/algorithm/decorator.py +264 -0
  16. mantisdk/algorithm/fast.py +250 -0
  17. mantisdk/algorithm/gepa/__init__.py +59 -0
  18. mantisdk/algorithm/gepa/adapter.py +459 -0
  19. mantisdk/algorithm/gepa/gepa.py +364 -0
  20. mantisdk/algorithm/gepa/lib/__init__.py +18 -0
  21. mantisdk/algorithm/gepa/lib/adapters/README.md +12 -0
  22. mantisdk/algorithm/gepa/lib/adapters/__init__.py +0 -0
  23. mantisdk/algorithm/gepa/lib/adapters/anymaths_adapter/README.md +341 -0
  24. mantisdk/algorithm/gepa/lib/adapters/anymaths_adapter/__init__.py +1 -0
  25. mantisdk/algorithm/gepa/lib/adapters/anymaths_adapter/anymaths_adapter.py +174 -0
  26. mantisdk/algorithm/gepa/lib/adapters/anymaths_adapter/requirements.txt +1 -0
  27. mantisdk/algorithm/gepa/lib/adapters/default_adapter/README.md +0 -0
  28. mantisdk/algorithm/gepa/lib/adapters/default_adapter/__init__.py +0 -0
  29. mantisdk/algorithm/gepa/lib/adapters/default_adapter/default_adapter.py +209 -0
  30. mantisdk/algorithm/gepa/lib/adapters/dspy_adapter/README.md +7 -0
  31. mantisdk/algorithm/gepa/lib/adapters/dspy_adapter/__init__.py +0 -0
  32. mantisdk/algorithm/gepa/lib/adapters/dspy_adapter/dspy_adapter.py +307 -0
  33. mantisdk/algorithm/gepa/lib/adapters/dspy_full_program_adapter/README.md +99 -0
  34. mantisdk/algorithm/gepa/lib/adapters/dspy_full_program_adapter/dspy_program_proposal_signature.py +137 -0
  35. mantisdk/algorithm/gepa/lib/adapters/dspy_full_program_adapter/full_program_adapter.py +266 -0
  36. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/GEPA_RAG.md +621 -0
  37. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/__init__.py +56 -0
  38. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/evaluation_metrics.py +226 -0
  39. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/generic_rag_adapter.py +496 -0
  40. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/rag_pipeline.py +238 -0
  41. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_store_interface.py +212 -0
  42. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_stores/__init__.py +2 -0
  43. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_stores/chroma_store.py +196 -0
  44. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_stores/lancedb_store.py +422 -0
  45. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_stores/milvus_store.py +409 -0
  46. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_stores/qdrant_store.py +368 -0
  47. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_stores/weaviate_store.py +418 -0
  48. mantisdk/algorithm/gepa/lib/adapters/mcp_adapter/README.md +552 -0
  49. mantisdk/algorithm/gepa/lib/adapters/mcp_adapter/__init__.py +37 -0
  50. mantisdk/algorithm/gepa/lib/adapters/mcp_adapter/mcp_adapter.py +705 -0
  51. mantisdk/algorithm/gepa/lib/adapters/mcp_adapter/mcp_client.py +364 -0
  52. mantisdk/algorithm/gepa/lib/adapters/terminal_bench_adapter/README.md +9 -0
  53. mantisdk/algorithm/gepa/lib/adapters/terminal_bench_adapter/__init__.py +0 -0
  54. mantisdk/algorithm/gepa/lib/adapters/terminal_bench_adapter/terminal_bench_adapter.py +217 -0
  55. mantisdk/algorithm/gepa/lib/api.py +375 -0
  56. mantisdk/algorithm/gepa/lib/core/__init__.py +0 -0
  57. mantisdk/algorithm/gepa/lib/core/adapter.py +180 -0
  58. mantisdk/algorithm/gepa/lib/core/data_loader.py +74 -0
  59. mantisdk/algorithm/gepa/lib/core/engine.py +356 -0
  60. mantisdk/algorithm/gepa/lib/core/result.py +233 -0
  61. mantisdk/algorithm/gepa/lib/core/state.py +636 -0
  62. mantisdk/algorithm/gepa/lib/examples/__init__.py +0 -0
  63. mantisdk/algorithm/gepa/lib/examples/aime.py +24 -0
  64. mantisdk/algorithm/gepa/lib/examples/anymaths-bench/eval_default.py +111 -0
  65. mantisdk/algorithm/gepa/lib/examples/anymaths-bench/prompt-templates/instruction_prompt.txt +9 -0
  66. mantisdk/algorithm/gepa/lib/examples/anymaths-bench/prompt-templates/optimal_prompt.txt +24 -0
  67. mantisdk/algorithm/gepa/lib/examples/anymaths-bench/train_anymaths.py +177 -0
  68. mantisdk/algorithm/gepa/lib/examples/dspy_full_program_evolution/arc_agi.ipynb +25705 -0
  69. mantisdk/algorithm/gepa/lib/examples/dspy_full_program_evolution/example.ipynb +348 -0
  70. mantisdk/algorithm/gepa/lib/examples/mcp_adapter/__init__.py +4 -0
  71. mantisdk/algorithm/gepa/lib/examples/mcp_adapter/mcp_optimization_example.py +455 -0
  72. mantisdk/algorithm/gepa/lib/examples/rag_adapter/RAG_GUIDE.md +613 -0
  73. mantisdk/algorithm/gepa/lib/examples/rag_adapter/__init__.py +9 -0
  74. mantisdk/algorithm/gepa/lib/examples/rag_adapter/rag_optimization.py +824 -0
  75. mantisdk/algorithm/gepa/lib/examples/rag_adapter/requirements-rag.txt +29 -0
  76. mantisdk/algorithm/gepa/lib/examples/terminal-bench/prompt-templates/instruction_prompt.txt +16 -0
  77. mantisdk/algorithm/gepa/lib/examples/terminal-bench/prompt-templates/terminus.txt +9 -0
  78. mantisdk/algorithm/gepa/lib/examples/terminal-bench/train_terminus.py +161 -0
  79. mantisdk/algorithm/gepa/lib/gepa_utils.py +117 -0
  80. mantisdk/algorithm/gepa/lib/logging/__init__.py +0 -0
  81. mantisdk/algorithm/gepa/lib/logging/experiment_tracker.py +187 -0
  82. mantisdk/algorithm/gepa/lib/logging/logger.py +75 -0
  83. mantisdk/algorithm/gepa/lib/logging/utils.py +103 -0
  84. mantisdk/algorithm/gepa/lib/proposer/__init__.py +0 -0
  85. mantisdk/algorithm/gepa/lib/proposer/base.py +31 -0
  86. mantisdk/algorithm/gepa/lib/proposer/merge.py +357 -0
  87. mantisdk/algorithm/gepa/lib/proposer/reflective_mutation/__init__.py +0 -0
  88. mantisdk/algorithm/gepa/lib/proposer/reflective_mutation/base.py +49 -0
  89. mantisdk/algorithm/gepa/lib/proposer/reflective_mutation/reflective_mutation.py +176 -0
  90. mantisdk/algorithm/gepa/lib/py.typed +0 -0
  91. mantisdk/algorithm/gepa/lib/strategies/__init__.py +0 -0
  92. mantisdk/algorithm/gepa/lib/strategies/batch_sampler.py +77 -0
  93. mantisdk/algorithm/gepa/lib/strategies/candidate_selector.py +50 -0
  94. mantisdk/algorithm/gepa/lib/strategies/component_selector.py +36 -0
  95. mantisdk/algorithm/gepa/lib/strategies/eval_policy.py +64 -0
  96. mantisdk/algorithm/gepa/lib/strategies/instruction_proposal.py +127 -0
  97. mantisdk/algorithm/gepa/lib/utils/__init__.py +10 -0
  98. mantisdk/algorithm/gepa/lib/utils/stop_condition.py +196 -0
  99. mantisdk/algorithm/gepa/tracing.py +105 -0
  100. mantisdk/algorithm/utils.py +177 -0
  101. mantisdk/algorithm/verl/__init__.py +5 -0
  102. mantisdk/algorithm/verl/interface.py +202 -0
  103. mantisdk/cli/__init__.py +56 -0
  104. mantisdk/cli/prometheus.py +115 -0
  105. mantisdk/cli/store.py +131 -0
  106. mantisdk/cli/vllm.py +29 -0
  107. mantisdk/client.py +408 -0
  108. mantisdk/config.py +348 -0
  109. mantisdk/emitter/__init__.py +43 -0
  110. mantisdk/emitter/annotation.py +370 -0
  111. mantisdk/emitter/exception.py +54 -0
  112. mantisdk/emitter/message.py +61 -0
  113. mantisdk/emitter/object.py +117 -0
  114. mantisdk/emitter/reward.py +320 -0
  115. mantisdk/env_var.py +156 -0
  116. mantisdk/execution/__init__.py +15 -0
  117. mantisdk/execution/base.py +64 -0
  118. mantisdk/execution/client_server.py +443 -0
  119. mantisdk/execution/events.py +69 -0
  120. mantisdk/execution/inter_process.py +16 -0
  121. mantisdk/execution/shared_memory.py +282 -0
  122. mantisdk/instrumentation/__init__.py +119 -0
  123. mantisdk/instrumentation/agentops.py +314 -0
  124. mantisdk/instrumentation/agentops_langchain.py +45 -0
  125. mantisdk/instrumentation/litellm.py +83 -0
  126. mantisdk/instrumentation/vllm.py +81 -0
  127. mantisdk/instrumentation/weave.py +500 -0
  128. mantisdk/litagent/__init__.py +11 -0
  129. mantisdk/litagent/decorator.py +536 -0
  130. mantisdk/litagent/litagent.py +252 -0
  131. mantisdk/llm_proxy.py +1890 -0
  132. mantisdk/logging.py +370 -0
  133. mantisdk/reward.py +7 -0
  134. mantisdk/runner/__init__.py +11 -0
  135. mantisdk/runner/agent.py +845 -0
  136. mantisdk/runner/base.py +182 -0
  137. mantisdk/runner/legacy.py +309 -0
  138. mantisdk/semconv.py +170 -0
  139. mantisdk/server.py +401 -0
  140. mantisdk/store/__init__.py +23 -0
  141. mantisdk/store/base.py +897 -0
  142. mantisdk/store/client_server.py +2092 -0
  143. mantisdk/store/collection/__init__.py +30 -0
  144. mantisdk/store/collection/base.py +587 -0
  145. mantisdk/store/collection/memory.py +970 -0
  146. mantisdk/store/collection/mongo.py +1412 -0
  147. mantisdk/store/collection_based.py +1823 -0
  148. mantisdk/store/insight.py +648 -0
  149. mantisdk/store/listener.py +58 -0
  150. mantisdk/store/memory.py +396 -0
  151. mantisdk/store/mongo.py +165 -0
  152. mantisdk/store/sqlite.py +3 -0
  153. mantisdk/store/threading.py +357 -0
  154. mantisdk/store/utils.py +142 -0
  155. mantisdk/tracer/__init__.py +16 -0
  156. mantisdk/tracer/agentops.py +242 -0
  157. mantisdk/tracer/base.py +287 -0
  158. mantisdk/tracer/dummy.py +106 -0
  159. mantisdk/tracer/otel.py +555 -0
  160. mantisdk/tracer/weave.py +677 -0
  161. mantisdk/trainer/__init__.py +6 -0
  162. mantisdk/trainer/init_utils.py +263 -0
  163. mantisdk/trainer/legacy.py +367 -0
  164. mantisdk/trainer/registry.py +12 -0
  165. mantisdk/trainer/trainer.py +618 -0
  166. mantisdk/types/__init__.py +6 -0
  167. mantisdk/types/core.py +553 -0
  168. mantisdk/types/resources.py +204 -0
  169. mantisdk/types/tracer.py +515 -0
  170. mantisdk/types/tracing.py +218 -0
  171. mantisdk/utils/__init__.py +1 -0
  172. mantisdk/utils/id.py +18 -0
  173. mantisdk/utils/metrics.py +1025 -0
  174. mantisdk/utils/otel.py +578 -0
  175. mantisdk/utils/otlp.py +536 -0
  176. mantisdk/utils/server_launcher.py +1045 -0
  177. mantisdk/utils/system_snapshot.py +81 -0
  178. mantisdk/verl/__init__.py +8 -0
  179. mantisdk/verl/__main__.py +6 -0
  180. mantisdk/verl/async_server.py +46 -0
  181. mantisdk/verl/config.yaml +27 -0
  182. mantisdk/verl/daemon.py +1154 -0
  183. mantisdk/verl/dataset.py +44 -0
  184. mantisdk/verl/entrypoint.py +248 -0
  185. mantisdk/verl/trainer.py +549 -0
  186. mantisdk-0.1.0.dist-info/METADATA +119 -0
  187. mantisdk-0.1.0.dist-info/RECORD +190 -0
  188. mantisdk-0.1.0.dist-info/WHEEL +4 -0
  189. mantisdk-0.1.0.dist-info/entry_points.txt +2 -0
  190. mantisdk-0.1.0.dist-info/licenses/LICENSE +19 -0
@@ -0,0 +1,1045 @@
1
+ # Copyright (c) Microsoft. All rights reserved.
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+ import inspect
7
+ import logging
8
+ import multiprocessing
9
+ import os
10
+ import queue
11
+ import signal
12
+ import socket
13
+ import threading
14
+ import time
15
+ import traceback
16
+ from contextlib import asynccontextmanager, suppress
17
+ from dataclasses import dataclass
18
+ from multiprocessing.process import BaseProcess
19
+ from typing import Any, AsyncContextManager, AsyncIterator, Dict, Literal, Optional, cast
20
+
21
+ import aiohttp
22
+ import requests
23
+ import uvicorn
24
+ from fastapi import FastAPI
25
+ from gunicorn.app.base import BaseApplication
26
+ from gunicorn.arbiter import Arbiter
27
+ from portpicker import pick_unused_port
28
+
29
+ __all__ = ["PythonServerLauncher", "PythonServerLauncherArgs", "LaunchMode"]
30
+
31
+
32
+ LaunchMode = Literal["asyncio", "thread", "mp"]
33
+ """The launch mode for the server."""
34
+
35
+
36
+ @dataclass
37
+ class PythonServerLauncherArgs:
38
+ port: Optional[int] = None
39
+ """The TCP port to listen on. If not provided, the server will use a random available port."""
40
+ host: Optional[str] = None
41
+ """The hostname or IP address to bind the server to."""
42
+ access_host: Optional[str] = None
43
+ """The hostname or IP address to advertise to the client. If not provided, the server will use the default outbound IPv4 address for this machine."""
44
+ launch_mode: LaunchMode = "asyncio"
45
+ """The launch mode. `asyncio` is the default mode to runs the server in the current thread.
46
+ `thread` runs the server in a separate thread. `mp` runs the server in a separate process."""
47
+ n_workers: int = 1
48
+ """The number of workers to run in the server. Only applicable for `mp` mode.
49
+ When `n_workers > 1`, the server will be run using Gunicorn.
50
+ """
51
+ healthcheck_url: Optional[str] = None
52
+ """The health check URL to use.
53
+ If not provided, the server will not be checked for healthiness after starting.
54
+ """
55
+ log_level: int = logging.INFO
56
+ """The log level to use."""
57
+ access_log: bool = False
58
+ """Whether to turn on access logs."""
59
+ startup_timeout: float = 60.0
60
+ """The timeout to wait for the server to start up."""
61
+ kill_unhealthy_server: bool = True
62
+ """Whether to kill the server if it is not healthy after startup.
63
+ This setting is ignored when `launch_mode` is not `asyncio`.
64
+ """
65
+ thread_join_timeout: float = 10.0
66
+ """The timeout to wait for the thread to join."""
67
+ process_join_timeout: float = 10.0
68
+ """The timeout to wait for the process to join."""
69
+ timeout_keep_alive: int = 30
70
+ """The timeout to keep the connection alive."""
71
+
72
+
73
+ @dataclass
74
+ class ChildEvent:
75
+ """An event that occurred in a child process."""
76
+
77
+ kind: Literal["ready", "error"]
78
+ """The kind of message."""
79
+ exc_type: Optional[str] = None
80
+ """The type of the exception, only used for error messages."""
81
+ message: Optional[str] = None
82
+ """The message of the exception, only used for error messages."""
83
+ traceback: Optional[str] = None
84
+ """The traceback of the exception, only used for error messages."""
85
+
86
+
87
+ logger = logging.getLogger(__name__)
88
+
89
+
90
+ class GunicornApp(BaseApplication):
91
+ """
92
+ Programmatic Gunicorn application that:
93
+
94
+ - Accepts a `FastAPI` app object and option dict.
95
+ - Uses `uvicorn_worker.UvicornWorker`.
96
+ """
97
+
98
+ def __init__(self, app: FastAPI, options: Dict[str, Any]):
99
+ self.application = app
100
+ self.options = options
101
+ super().__init__() # type: ignore
102
+
103
+ def load_config(self):
104
+ cfg = self.cfg
105
+ valid_keys = cfg.settings.keys() # type: ignore
106
+ for k, v in (self.options or {}).items():
107
+ if k in valid_keys and v is not None:
108
+ cfg.set(k, v) # type: ignore
109
+
110
+ def load(self):
111
+ return self.application
112
+
113
+
114
+ async def shutdown_uvicorn_server(server: uvicorn.Server, task: asyncio.Task[None], timeout: float = 5.0) -> None:
115
+ """Shutdown a uvicorn server and await the serving task."""
116
+ logger.debug("Requesting graceful shutdown of uvicorn server.")
117
+ server.should_exit = True
118
+ # Give uvicorn a brief window to shut down cleanly.
119
+ try:
120
+ logger.debug("Waiting for graceful shutdown of uvicorn server.")
121
+ await asyncio.wait_for(task, timeout=timeout)
122
+ logger.debug("Graceful shutdown of uvicorn server completed.")
123
+ except asyncio.TimeoutError:
124
+ logger.error("Graceful shutdown of uvicorn server timed out.")
125
+ # As a last resort, cancel; this shouldn't happen under normal circumstances.
126
+ task.cancel()
127
+ with suppress(asyncio.CancelledError):
128
+ await task
129
+ logger.warning("Uvicorn server forced to stop.")
130
+
131
+
132
+ @asynccontextmanager
133
+ async def noop_context() -> AsyncIterator[None]:
134
+ """A real async context manager that does nothing (satisfies serve_context)."""
135
+ yield
136
+
137
+
138
+ async def run_uvicorn_asyncio(
139
+ uvicorn_server: uvicorn.Server,
140
+ serve_context: AsyncContextManager[Any],
141
+ timeout: float = 60.0,
142
+ health_url: Optional[str] = None,
143
+ wait_for_serve: bool = True,
144
+ kill_unhealthy_server: bool = True,
145
+ ) -> asyncio.Task[None]:
146
+ """Run two Asyncio tasks in parallel:
147
+
148
+ - A watcher task that waits for the server to start up and then checks for healthiness.
149
+ - A server task that serves the server.
150
+ """
151
+ server_start_exception: Optional[BaseException] = None
152
+
153
+ # watcher: when server.started flips True, announce READY once
154
+ async def _watch_server() -> None:
155
+ start_time = time.time()
156
+ deadline = start_time + timeout # child-side startup window
157
+ logger.debug(f"Waiting for server to start up for {timeout:.2f} seconds...")
158
+ # Wait for the server to start up or the deadline to be reached, or an exception to be raised.
159
+ while time.time() < deadline and not uvicorn_server.started and server_start_exception is None:
160
+ await asyncio.sleep(0.1)
161
+
162
+ if not uvicorn_server.started:
163
+ # Normally, the program will not reach this point, as the server will throw the exception itself earlier.
164
+ raise RuntimeError(
165
+ f"Server did not start up within {time.time() - start_time:.2f} seconds."
166
+ ) from server_start_exception
167
+
168
+ logger.info(f"Server started up in {time.time() - start_time:.2f} seconds.")
169
+
170
+ # Check for health endpoint status if provided
171
+ if health_url is not None:
172
+ logger.info(f"Probing health endpoint {health_url}...")
173
+ async with aiohttp.ClientSession() as session:
174
+ while time.time() < deadline:
175
+ try:
176
+ async with session.get(health_url) as resp:
177
+ if resp.status == 200:
178
+ logger.info(
179
+ f"Server is healthy at {health_url} in {time.time() - start_time:.2f} seconds."
180
+ )
181
+ return
182
+ else:
183
+ logger.debug(
184
+ f"Server is NOT healthy at {health_url} in {time.time() - start_time:.2f} seconds. Got status {resp.status}."
185
+ )
186
+ except Exception as e:
187
+ logger.debug(f"Error probing health endpoint {health_url}: {str(e)}")
188
+ await asyncio.sleep(0.1)
189
+
190
+ # If the server is not healthy, kill it if requested.
191
+ health_failed_seconds = time.time() - start_time
192
+ if kill_unhealthy_server:
193
+ logger.error(
194
+ f"Server is not healthy at {health_url} after {health_failed_seconds:.2f} seconds. Shutting down server gracefully."
195
+ )
196
+ uvicorn_server.should_exit = True
197
+ await serve_task
198
+
199
+ raise RuntimeError(
200
+ f"Server is not healthy at {health_url} after {health_failed_seconds:.2f} seconds. It has been killed."
201
+ )
202
+ else:
203
+ logger.error(
204
+ f"Server is not healthy at {health_url} after {health_failed_seconds:.2f} seconds. It has been left running."
205
+ )
206
+
207
+ else:
208
+ logger.info("Server does not provide a health check endpoint. Skipping health check.")
209
+
210
+ async def _serve_server() -> None:
211
+ nonlocal server_start_exception
212
+ async with serve_context:
213
+ try:
214
+ await uvicorn_server.serve()
215
+ except (asyncio.CancelledError, KeyboardInterrupt):
216
+ # Normal shutdown path; propagate without rewrapping
217
+ raise
218
+ except BaseException as exc:
219
+ server_start_exception = exc
220
+ if wait_for_serve:
221
+ # This probably sends out earlier than watcher exception; but either one is fine.
222
+ raise RuntimeError("Uvicorn server failed to serve") from exc
223
+ else:
224
+ # If the caller is not waiting for this coroutine, we just log the error.
225
+ # It will be handled by the watch task.
226
+ logger.exception("Uvicorn server failed to serve. Inspect the logs for details.")
227
+
228
+ serve_task = asyncio.create_task(_serve_server())
229
+ watch_task = asyncio.create_task(_watch_server())
230
+
231
+ if wait_for_serve:
232
+ await asyncio.gather(watch_task, serve_task)
233
+ else:
234
+ # Wait for watch only, the serve task will run in the background.
235
+ await watch_task
236
+ return serve_task
237
+
238
+
239
+ def run_uvicorn_thread(
240
+ uvicorn_server: uvicorn.Server,
241
+ serve_context: AsyncContextManager[Any],
242
+ event_queue: queue.Queue[ChildEvent],
243
+ stop_event: threading.Event,
244
+ timeout: float = 60.0,
245
+ health_url: Optional[str] = None,
246
+ ):
247
+ """
248
+ Run a uvicorn server in a thread.
249
+
250
+ How to stop programmatically (from the main thread):
251
+
252
+ uvicorn_server.should_exit = True
253
+
254
+ This function:
255
+
256
+ - starts the server and waits for startup/health (if provided),
257
+ - then blocks until the server exits,
258
+ - shuts down cleanly if an error happens during startup/health,
259
+ - or if the thread is stopped by stop event.
260
+ """
261
+
262
+ async def _main() -> None:
263
+ # Start server without waiting for full lifecycle; return once startup/health is done.
264
+ serve_task: Optional[asyncio.Task[None]] = None
265
+ try:
266
+ serve_task = await run_uvicorn_asyncio(
267
+ uvicorn_server=uvicorn_server,
268
+ serve_context=serve_context,
269
+ timeout=timeout,
270
+ health_url=health_url,
271
+ wait_for_serve=False, # return after startup watcher finishes
272
+ kill_unhealthy_server=True, # raise if health fails within timeout
273
+ )
274
+ event_queue.put(ChildEvent(kind="ready"))
275
+ except Exception as exc:
276
+ # Startup/health failed; nothing is running in the background.
277
+ logger.exception("Uvicorn failed to start or was unhealthy.")
278
+ event_queue.put(
279
+ ChildEvent(
280
+ kind="error", exc_type=type(exc).__name__, message=str(exc), traceback=traceback.format_exc()
281
+ )
282
+ )
283
+ return
284
+
285
+ logger.debug("Thread server started and ready.")
286
+ try:
287
+ # At this point, the server is up and serving in the same thread's loop.
288
+ # Block here until it exits (caller can stop it via setting the stop_event).
289
+ while not stop_event.is_set():
290
+ await asyncio.sleep(0.1)
291
+ except asyncio.CancelledError:
292
+ # Shutdown the server.
293
+ logger.warning(
294
+ "Thread server received asyncio cancellation signal. Shutting down gracefully. This is not the recommended way to stop the server."
295
+ )
296
+ raise
297
+ except Exception as exc:
298
+ logger.exception("Exception during the thread event waiting loop.")
299
+ event_queue.put(
300
+ ChildEvent(
301
+ kind="error", exc_type=type(exc).__name__, message=str(exc), traceback=traceback.format_exc()
302
+ )
303
+ )
304
+ finally:
305
+ logger.info("Requesting graceful shutdown of uvicorn server.")
306
+ await shutdown_uvicorn_server(uvicorn_server, serve_task)
307
+ logger.info("Uvicorn server shut down gracefully.")
308
+
309
+ # Each thread needs its own event loop; use asyncio.run to manage it cleanly.
310
+ try:
311
+ asyncio.run(_main())
312
+ except Exception:
313
+ # Exceptions are already logged above; don't crash the process from a thread.
314
+ # (Caller can inspect logs or add a queue/handler if they need to propagate.)
315
+ logger.exception("Exception within the thread server loop. Inspect the logs for details.")
316
+
317
+
318
+ def run_uvicorn_subprocess(
319
+ uvicorn_server: uvicorn.Server,
320
+ serve_context: AsyncContextManager[Any],
321
+ event_queue: multiprocessing.Queue[ChildEvent],
322
+ timeout: float = 60.0,
323
+ health_url: Optional[str] = None,
324
+ ):
325
+ """Run a uvicorn server in a subprocess.
326
+
327
+ Behavior:
328
+
329
+ - Start uvicorn and wait for startup/health (if provided).
330
+ - Post `ChildEvent(kind="ready")` once the server is up.
331
+ - Stay alive until a termination signal (SIGTERM/SIGINT).
332
+ - On signal, request graceful shutdown and wait for the server to exit.
333
+
334
+ This must be used with forked multiprocessing.Process.
335
+ """
336
+
337
+ async def _main() -> None:
338
+ stop_event = asyncio.Event()
339
+
340
+ # Register signal handlers
341
+ loop = asyncio.get_running_loop()
342
+ for sig in (signal.SIGTERM, signal.SIGINT):
343
+ loop.add_signal_handler(sig, stop_event.set)
344
+ logger.debug("Subprocess signal handlers registered.")
345
+
346
+ serve_task: Optional[asyncio.Task[None]] = None
347
+
348
+ try:
349
+ # Start server but don't block on its full lifecycle; this returns once the watcher finishes.
350
+ serve_task = await run_uvicorn_asyncio(
351
+ uvicorn_server=uvicorn_server,
352
+ serve_context=serve_context,
353
+ timeout=timeout,
354
+ health_url=health_url,
355
+ wait_for_serve=False, # return after startup/health passes
356
+ kill_unhealthy_server=True, # if unhealthy, fail fast in the child
357
+ )
358
+
359
+ # Announce readiness only after watcher success.
360
+ event_queue.put(ChildEvent(kind="ready"))
361
+
362
+ logger.debug("Subprocess server started and ready.")
363
+
364
+ # Wait until we're told to stop.
365
+ await stop_event.wait()
366
+
367
+ except Exception as exc:
368
+ # Propagate any startup/health errors to the parent.
369
+ event_queue.put(
370
+ ChildEvent(
371
+ kind="error",
372
+ exc_type=type(exc).__name__,
373
+ message=str(exc),
374
+ traceback=traceback.format_exc(),
375
+ )
376
+ )
377
+ logger.exception("Subprocess server failed to start or was unhealthy.")
378
+
379
+ finally:
380
+ # Request graceful shutdown if the server is running.
381
+ if serve_task is not None:
382
+ logger.info("Requesting graceful shutdown of subprocess server.")
383
+ await shutdown_uvicorn_server(uvicorn_server, serve_task)
384
+ logger.info("Subprocess server shut down gracefully.")
385
+ else:
386
+ logger.info("Subprocess server was not running. Nothing to stop.")
387
+
388
+ try:
389
+ asyncio.run(_main())
390
+ except Exception as exc:
391
+ # If something escapes _main(), make sure the parent hears about it.
392
+ event_queue.put(
393
+ ChildEvent(
394
+ kind="error",
395
+ exc_type=type(exc).__name__,
396
+ message=str(exc),
397
+ traceback=traceback.format_exc(),
398
+ )
399
+ )
400
+
401
+
402
+ def run_gunicorn(
403
+ gunicorn_app: GunicornApp,
404
+ serve_context: AsyncContextManager[Any],
405
+ event_queue: multiprocessing.Queue[ChildEvent],
406
+ timeout: float = 60.0,
407
+ health_url: Optional[str] = None,
408
+ ):
409
+ """Run a gunicorn server in a subprocess.
410
+
411
+ The master arbiter will reside in a non-daemon subprocess,
412
+ and the workers will be forked from the arbiter.
413
+
414
+ Behavior:
415
+
416
+ - Start Arbiter.run() (blocking) in this process.
417
+ - A watchdog thread waits for workers to spawn, then (optionally) verifies a health URL.
418
+ - On success: put `ChildEvent(kind="ready")`.
419
+ - On failure/timeout: put `ChildEvent(kind="error")` and request a graceful shutdown.
420
+
421
+ `serve_context` will be applied around the `arbiter.run()` call.
422
+ """
423
+ # Create the arbiter up-front so the watchdog can inspect it.
424
+ try:
425
+ arbiter = Arbiter(gunicorn_app)
426
+ except Exception as exc:
427
+ logger.exception("Failed to initialize Gunicorn Arbiter.")
428
+ event_queue.put(
429
+ ChildEvent(
430
+ kind="error",
431
+ exc_type=type(exc).__name__,
432
+ message=str(exc),
433
+ traceback=traceback.format_exc(),
434
+ )
435
+ )
436
+ return
437
+
438
+ runtime_error: Optional[BaseException] = None
439
+
440
+ def _watchdog() -> None:
441
+ start = time.time()
442
+ deadline = start + timeout
443
+
444
+ # First, wait for arbiter.workers to get populated
445
+ while time.time() < deadline and not arbiter.WORKERS: # type: ignore
446
+ # If arbiter died early, abort quickly.
447
+ if runtime_error is not None:
448
+ logger.error("Gunicorn arbiter exited during startup. Watchdog exiting.")
449
+ return
450
+ time.sleep(0.1)
451
+
452
+ if not arbiter.WORKERS: # type: ignore
453
+ elapsed_time = time.time() - start
454
+ logger.error("Gunicorn workers did not start within %.2f seconds.", elapsed_time)
455
+ if runtime_error is None:
456
+ # Timeout case: arbiter throws no exception.
457
+ event_queue.put(
458
+ ChildEvent(
459
+ kind="error",
460
+ exc_type="RuntimeError",
461
+ message=f"Gunicorn workers did not start within {elapsed_time:.2f} seconds.",
462
+ traceback=None,
463
+ )
464
+ )
465
+ logger.info("Halting Gunicorn arbiter.")
466
+ # Ask arbiter to stop if it's still alive.
467
+ # It will make the watchdog exit too.
468
+ arbiter.signal(signal.SIGTERM, inspect.currentframe()) # type: ignore
469
+ else:
470
+ # Timeout case: arbiter has thrown an exception.
471
+ logger.error("Gunicorn arbiter exited during startup. Watchdog exiting.")
472
+ return
473
+
474
+ # Second, check for health endpoint status if provided
475
+ if health_url:
476
+ while time.time() < deadline:
477
+ # If arbiter died early, abort.
478
+ if runtime_error is not None:
479
+ logger.error("Gunicorn arbiter exited during health check. Watchdog exiting.")
480
+ return
481
+
482
+ # Check if the server is healthy.
483
+ try:
484
+ resp = requests.get(health_url, timeout=2.0)
485
+ if resp.status_code == 200:
486
+ logger.debug(f"Server is healthy at {health_url} in {time.time() - start:.2f} seconds.")
487
+ # Check arbiter status again.
488
+ if runtime_error is None:
489
+ event_queue.put(ChildEvent(kind="ready"))
490
+ else:
491
+ logger.error(
492
+ "Response status is 200 but arbiter has thrown an exception. This should not happen."
493
+ )
494
+ return
495
+ except Exception:
496
+ logger.debug(
497
+ f"Server is still not healthy at {health_url} in {time.time() - start:.2f} seconds.",
498
+ exc_info=True,
499
+ )
500
+ time.sleep(0.1)
501
+
502
+ # Health failed: report and shut down.
503
+ elapsed = time.time() - start
504
+ logger.error(
505
+ "Server is not healthy at %s after %.2f seconds. Shutting down.",
506
+ health_url,
507
+ elapsed,
508
+ )
509
+ if runtime_error is None:
510
+ # Arbiter throws no exception. This is a simple timeout case.
511
+ event_queue.put(
512
+ ChildEvent(
513
+ kind="error",
514
+ exc_type="RuntimeError",
515
+ message=(
516
+ f"Server is not healthy at {health_url} after "
517
+ f"{elapsed:.2f} seconds. It will be killed by the watchdog."
518
+ ),
519
+ traceback=None,
520
+ )
521
+ )
522
+ logger.info("Halting Gunicorn arbiter.")
523
+ # Ask arbiter to stop if it's still alive.
524
+ arbiter.signal(signal.SIGTERM, inspect.currentframe()) # type: ignore
525
+ else:
526
+ # If arbiter has thrown an exception, report it.
527
+ logger.error("Gunicorn arbiter exited during health check. Watchdog exiting.")
528
+
529
+ else:
530
+ # No health check; workers up => ready.
531
+ if runtime_error is None:
532
+ event_queue.put(ChildEvent(kind="ready"))
533
+ else:
534
+ # If arbiter has thrown an exception, report it.
535
+ logger.error("Gunicorn arbiter exited unexpectedly before health check. Watchdog exiting.")
536
+
537
+ def _watchdog_with_exception() -> None:
538
+ try:
539
+ _watchdog()
540
+ except Exception as exc:
541
+ logger.exception("Exception in watchdog thread.")
542
+ event_queue.put(
543
+ ChildEvent(
544
+ kind="error", exc_type=type(exc).__name__, message=str(exc), traceback=traceback.format_exc()
545
+ )
546
+ )
547
+
548
+ watchdog_thread = threading.Thread(target=_watchdog_with_exception, daemon=True)
549
+ watchdog_thread.start()
550
+
551
+ async def _serve() -> None:
552
+ nonlocal runtime_error
553
+ try:
554
+ async with serve_context:
555
+ arbiter.run()
556
+ except Exception as exc:
557
+ runtime_error = exc
558
+ event_queue.put(
559
+ ChildEvent(
560
+ kind="error",
561
+ exc_type=type(exc).__name__,
562
+ message=str(exc),
563
+ traceback=traceback.format_exc(),
564
+ )
565
+ )
566
+ logger.exception("Gunicorn server failed to start.")
567
+
568
+ try:
569
+ asyncio.run(_serve())
570
+ # Most exceptions should have been caught within the _serve() coroutine.
571
+ finally:
572
+ # Ensure watchdog doesn't try to act on a dead arbiter for long.
573
+ watchdog_thread.join(timeout=5.0)
574
+
575
+
576
+ def _get_default_ipv4_address() -> str:
577
+ """Determine the default outbound IPv4 address for this machine.
578
+
579
+ Implementation:
580
+ Opens a UDP socket and "connects" to a public address to force route
581
+ selection, then inspects the socket's local address. No packets are sent.
582
+
583
+ Returns:
584
+ str: Best-guess IPv4 like `192.168.x.y`. Falls back to `127.0.0.1`.
585
+ """
586
+ s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
587
+ try:
588
+ # Doesn't actually contact 8.8.8.8; just forces the OS to pick a route.
589
+ s.connect(("8.8.8.8", 80))
590
+ return s.getsockname()[0]
591
+ except Exception:
592
+ return "127.0.0.1"
593
+ finally:
594
+ s.close()
595
+
596
+
597
+ class PythonServerLauncher:
598
+ """Unified launcher for FastAPI, using uvicorn or gunicorn per mode/worker count.
599
+
600
+ See [`PythonServerLauncherArgs`][mantisdk.utils.server_launcher.PythonServerLauncherArgs] for configuration options.
601
+
602
+ Args:
603
+ app: The FastAPI app to launch.
604
+ args: The configuration for the server.
605
+ serve_context: An optional context manager to apply around the server startup.
606
+ """
607
+
608
+ def __init__(
609
+ self, app: FastAPI, args: PythonServerLauncherArgs, serve_context: Optional[AsyncContextManager[Any]] = None
610
+ ):
611
+ """Initialize the launcher with the FastAPI app, configuration, and optional serve context."""
612
+ self.app = app
613
+ self.args = args
614
+ self.serve_context = serve_context
615
+ self._host: Optional[str] = self.args.host
616
+ self._port: Optional[int] = self.args.port
617
+ self._access_host: Optional[str] = self.args.access_host
618
+ self.initialize()
619
+
620
+ def initialize(self):
621
+ # ensure the host/port/access_host are set
622
+ self._ensure_host()
623
+ self._ensure_port()
624
+ self._ensure_access_host()
625
+
626
+ # uvicorn (in-proc asyncio)
627
+ self._uvicorn_server: Optional[uvicorn.Server] = None
628
+ self._uvicorn_task: Optional[asyncio.Task[None]] = None # returned by run_uvicorn_asyncio()
629
+
630
+ # uvicorn (thread)
631
+ self._thread: Optional[threading.Thread] = None
632
+ self._thread_event_queue: Optional[queue.Queue[ChildEvent]] = None
633
+ self._thread_stop_event: Optional[threading.Event] = None
634
+
635
+ # subprocess (uvicorn / gunicorn)
636
+ self._proc: Optional[BaseProcess] = None
637
+ self._mp_event_queue: Optional[multiprocessing.Queue[ChildEvent]] = None
638
+ self._gunicorn_app: Optional[GunicornApp] = None # programmatic gunicorn wrapper
639
+
640
+ # is_running flag
641
+ self._is_running: bool = False
642
+
643
+ def __getstate__(self):
644
+ """Control pickling to prevent server state from being sent to subprocesses."""
645
+ return {
646
+ "app": self.app,
647
+ "args": self.args,
648
+ "serve_context": self.serve_context,
649
+ "_host": self._host,
650
+ "_port": self._port,
651
+ "_access_host": self._access_host,
652
+ }
653
+
654
+ def __setstate__(self, state: Dict[str, Any]):
655
+ self.app = state["app"]
656
+ self.args = cast(PythonServerLauncherArgs, state["args"])
657
+ self.serve_context = state["serve_context"]
658
+ self._host = state["_host"]
659
+ self._port = state["_port"]
660
+ self._access_host = state["_access_host"]
661
+ self.initialize()
662
+
663
+ @property
664
+ def endpoint(self) -> str:
665
+ """Return the externally advertised host:port pair regardless of accessibility."""
666
+ return f"http://{self._ensure_host()}:{self._ensure_port()}"
667
+
668
+ @property
669
+ def access_endpoint(self) -> str:
670
+ """Return a loopback-friendly URL so health checks succeed even when binding to 0.0.0.0."""
671
+ return f"http://{self._ensure_access_host()}:{self._ensure_port()}"
672
+
673
+ @property
674
+ def health_url(self) -> Optional[str]:
675
+ """Build the absolute health-check endpoint from args, if one is configured."""
676
+ if not self.args.healthcheck_url:
677
+ return None
678
+ path = self.args.healthcheck_url
679
+ if not path.startswith("/"):
680
+ path = "/" + path
681
+ return f"{self.access_endpoint}{path}"
682
+
683
+ async def start(self):
684
+ """Starts the server according to launch_mode and n_workers."""
685
+ logger.info(f"Starting server {self._normalize_app_ref(self.app)}...")
686
+ mode = self.args.launch_mode
687
+ if mode == "mp":
688
+ await self._start_serving_process()
689
+ elif mode == "thread":
690
+ await self._start_uvicorn_thread()
691
+ elif mode == "asyncio":
692
+ await self._start_uvicorn_asyncio()
693
+ else:
694
+ raise ValueError(f"Unsupported launch mode: {mode}")
695
+ logger.info(f"Server {self._normalize_app_ref(self.app)} started at {self.endpoint}")
696
+
697
+ async def stop(self):
698
+ """Stop the server using the inverse of whatever launch mode was used to start it."""
699
+ logger.info(f"Stopping server {self._normalize_app_ref(self.app)}...")
700
+ mode = self.args.launch_mode
701
+ if mode == "mp":
702
+ await self._stop_serving_process()
703
+ elif mode == "thread":
704
+ await self._stop_uvicorn_thread()
705
+ elif mode == "asyncio":
706
+ await self._stop_uvicorn_asyncio()
707
+ else:
708
+ raise ValueError(f"Unsupported launch mode: {mode}")
709
+ logger.info(f"Server {self._normalize_app_ref(self.app)} stopped")
710
+
711
+ async def reload(self):
712
+ """Restart the server by stopping it if necessary and invoking start again."""
713
+ if self.is_running():
714
+ await self.stop()
715
+ await self.start()
716
+
717
+ async def run_forever(self):
718
+ """Start the server and block the caller until it exits, respecting the configured mode."""
719
+ mode = self.args.launch_mode
720
+ if mode == "asyncio":
721
+ await self._start_uvicorn_asyncio()
722
+ try:
723
+ if self._uvicorn_task is not None:
724
+ # Wait for the server
725
+ # Won't allow outer cancel to directly cancel the inner task
726
+ await asyncio.shield(self._uvicorn_task)
727
+ except (asyncio.CancelledError, KeyboardInterrupt):
728
+ logger.warning("Server received cancellation signal. Shutting down gracefully.")
729
+ await self._stop_uvicorn_asyncio()
730
+ raise
731
+
732
+ elif mode == "thread":
733
+ await self._start_uvicorn_thread()
734
+ try:
735
+ # Wait for the thread to exit
736
+ while self._thread and self._thread.is_alive():
737
+ await asyncio.sleep(0.5)
738
+ except (asyncio.CancelledError, KeyboardInterrupt):
739
+ logger.warning("Server thread received cancellation signal. Shutting down gracefully.")
740
+ await self._stop_uvicorn_thread()
741
+ raise
742
+
743
+ elif mode == "mp":
744
+ await self._start_serving_process()
745
+ try:
746
+ # Wait for the process to exit
747
+ while self._proc and self._proc.is_alive():
748
+ await asyncio.sleep(0.5)
749
+ except (asyncio.CancelledError, KeyboardInterrupt):
750
+ logger.warning("Server process received cancellation signal. Shutting down gracefully.")
751
+ await self._stop_serving_process()
752
+ raise
753
+
754
+ else:
755
+ raise ValueError(f"Unsupported launch mode: {mode}")
756
+
757
+ def is_running(self) -> bool:
758
+ """Return True if the server has been started and not yet stopped."""
759
+ return self._is_running
760
+
761
+ @staticmethod
762
+ def _normalize_app_ref(app: FastAPI) -> str:
763
+ module = getattr(app, "__module__", None)
764
+ if module and module != "__main__":
765
+ return f"{module}:app"
766
+ return "unknown:app"
767
+
768
+ def _ensure_host(self) -> str:
769
+ if self._host is None:
770
+ logger.warning("No host provided, using 0.0.0.0.")
771
+ self._host = "0.0.0.0"
772
+ return self._host
773
+
774
+ def _ensure_port(self) -> int:
775
+ if self._port is None:
776
+ logger.warning("No port provided, using pick_unused_port to pick a random unused port.")
777
+ self._port = pick_unused_port()
778
+ return self._port
779
+
780
+ def _ensure_access_host(self) -> str:
781
+ if self._access_host is None:
782
+ if self.args.access_host is None:
783
+ if self._ensure_host() in ("0.0.0.0", "::"):
784
+ # Probe host normalization for 0.0.0.0
785
+ logger.warning("No access host provided, using default outbound IPv4 address for this machine.")
786
+ self._access_host = _get_default_ipv4_address()
787
+ else:
788
+ logger.warning("No access host provided, using the host provided.")
789
+ self._access_host = self._ensure_host()
790
+ else:
791
+ self._access_host = self.args.access_host
792
+ return self._access_host # type: ignore
793
+
794
+ def _create_uvicorn_server(self) -> uvicorn.Server:
795
+ config = uvicorn.Config(
796
+ app=self.app,
797
+ host=self._ensure_host(),
798
+ port=self._ensure_port(),
799
+ log_level=self.args.log_level,
800
+ access_log=self.args.access_log,
801
+ loop="asyncio",
802
+ timeout_keep_alive=self.args.timeout_keep_alive,
803
+ )
804
+ return uvicorn.Server(config)
805
+
806
+ def _ctx(self) -> AsyncContextManager[Any]:
807
+ # Use the provided serve_context if any; otherwise a no-op async CM
808
+ if self.serve_context is None:
809
+ logger.info("No serve_context provided, using noop_context.")
810
+ return noop_context()
811
+ return self.serve_context
812
+
813
+ # --- Mode 1: asyncio (in-proc) using run_uvicorn_asyncio ---
814
+
815
+ async def _start_uvicorn_asyncio(self):
816
+ if self.is_running():
817
+ raise RuntimeError("Server is already running. Stopping it first.")
818
+
819
+ logger.info("Starting uvicorn asyncio server...")
820
+ self._uvicorn_server = self._create_uvicorn_server()
821
+ # Start server; return after health passes; keep serving in background task
822
+ self._uvicorn_task = await run_uvicorn_asyncio(
823
+ uvicorn_server=self._uvicorn_server,
824
+ serve_context=self._ctx(),
825
+ timeout=self.args.startup_timeout,
826
+ health_url=self.health_url,
827
+ wait_for_serve=False, # return once startup/health OK
828
+ kill_unhealthy_server=self.args.kill_unhealthy_server,
829
+ )
830
+ self._is_running = True
831
+ logger.info("Uvicorn asyncio server started")
832
+
833
+ async def _stop_uvicorn_asyncio(self):
834
+ # Gracefully shut down the in-proc uvicorn server task if running
835
+ logger.info("Stopping uvicorn asyncio server...")
836
+ if self._uvicorn_server and self._uvicorn_task:
837
+ await shutdown_uvicorn_server(self._uvicorn_server, self._uvicorn_task)
838
+ self._uvicorn_task = None
839
+ self._uvicorn_server = None
840
+ self._is_running = False
841
+ logger.info("Uvicorn asyncio server stopped")
842
+
843
+ # --- Mode 2: thread (in-proc) using run_uvicorn_thread ---
844
+
845
+ async def _start_uvicorn_thread(self):
846
+ if self.is_running():
847
+ raise RuntimeError("Server is already running. Stopping it first.")
848
+
849
+ logger.info("Starting uvicorn thread server...")
850
+ self._uvicorn_server = self._create_uvicorn_server()
851
+ self._thread_event_queue = queue.Queue()
852
+ self._thread_stop_event = threading.Event()
853
+
854
+ self._thread = threading.Thread(
855
+ target=run_uvicorn_thread,
856
+ kwargs={
857
+ "uvicorn_server": self._uvicorn_server,
858
+ "serve_context": self._ctx(),
859
+ "event_queue": self._thread_event_queue,
860
+ "stop_event": self._thread_stop_event,
861
+ "timeout": self.args.startup_timeout,
862
+ "health_url": self.health_url,
863
+ },
864
+ daemon=True,
865
+ )
866
+ self._thread.start()
867
+
868
+ # Wait for ready or error event from the thread
869
+ timeout = self.args.startup_timeout * 2 # Allows twice the timeout for the thread to get the event
870
+ try:
871
+ evt: ChildEvent = await asyncio.to_thread(self._thread_event_queue.get, True, timeout)
872
+ except queue.Empty:
873
+ if not self._thread.is_alive():
874
+ raise RuntimeError("Threaded server failed to start and is not alive. No error event was received.")
875
+ logger.error(
876
+ "Threaded server failed to start and sends no event. This should not happen. Shutting down server."
877
+ )
878
+ await self._stop_uvicorn_thread()
879
+ raise RuntimeError("Threaded server failed to start and sends no event. This should not happen.")
880
+
881
+ if evt.kind == "error":
882
+ logger.error("Threaded server failed to start (%s): %s\n%s", evt.exc_type, evt.message, evt.traceback)
883
+ await asyncio.to_thread(self._thread.join, self.args.thread_join_timeout)
884
+ if self._thread.is_alive():
885
+ logger.error("Threaded server failed to start and refused to shut down.")
886
+ raise RuntimeError(evt.message)
887
+ else:
888
+ logger.info("Threaded server started successfully.")
889
+ self._is_running = True
890
+
891
+ async def _stop_uvicorn_thread(self):
892
+ logger.info("Stopping uvicorn thread server...")
893
+ if self._thread_stop_event:
894
+ self._thread_stop_event.set()
895
+ if self._thread:
896
+ await asyncio.to_thread(self._thread.join, self.args.thread_join_timeout)
897
+ if self._thread.is_alive():
898
+ raise RuntimeError("Threaded server refused to shut down.")
899
+ else:
900
+ logger.info("Uvicorn thread server was not running. Nothing to stop.")
901
+
902
+ self._thread = None
903
+ self._thread_event_queue = None
904
+ self._thread_stop_event = None
905
+ self._uvicorn_server = None
906
+ self._is_running = False
907
+ logger.info("Uvicorn thread server stopped")
908
+
909
+ # --- Mode 3: subprocess (uvicorn / gunicorn) using run_uvicorn_subprocess or run_gunicorn ---
910
+
911
+ async def _start_serving_process(self):
912
+ if self.is_running():
913
+ raise RuntimeError("Server process is already running. Stopping it first.")
914
+
915
+ host = self._ensure_host()
916
+ port = self._ensure_port()
917
+
918
+ try:
919
+ ctx = multiprocessing.get_context("fork")
920
+ except ValueError as e:
921
+ raise RuntimeError(
922
+ "Process launch requires 'fork' start method (Linux/macOS). "
923
+ "On Windows, use 'thread' or 'asyncio' modes."
924
+ ) from e
925
+ self._mp_event_queue = ctx.Queue()
926
+
927
+ # Gunicorn path when n_workers > 1
928
+ if self.args.n_workers > 1:
929
+ logger.info(f"Starting Gunicorn server...")
930
+ options = {
931
+ "bind": f"{host}:{port}",
932
+ "workers": int(self.args.n_workers),
933
+ "worker_class": "uvicorn_worker.UvicornWorker",
934
+ "loglevel": logging.getLevelName(self.args.log_level).lower(),
935
+ "accesslog": "-" if self.args.access_log else None,
936
+ "errorlog": "-",
937
+ "preload_app": True,
938
+ "graceful_timeout": int(
939
+ self.args.process_join_timeout / 2
940
+ ), # Allow half the timeout for graceful shutdown
941
+ }
942
+ if "PROMETHEUS_MULTIPROC_DIR" in os.environ:
943
+ from mantisdk.utils.metrics import shutdown_metrics
944
+
945
+ options["child_exit"] = shutdown_metrics # type: ignore
946
+
947
+ self._gunicorn_app = GunicornApp(self.app, options)
948
+
949
+ self._proc = ctx.Process(
950
+ target=run_gunicorn,
951
+ kwargs={
952
+ "gunicorn_app": self._gunicorn_app,
953
+ "serve_context": self._ctx(),
954
+ "event_queue": self._mp_event_queue,
955
+ "timeout": self.args.startup_timeout,
956
+ "health_url": self.health_url,
957
+ },
958
+ daemon=False,
959
+ )
960
+ self._proc.start()
961
+
962
+ else:
963
+ # Single-worker subprocess uvicorn
964
+ logger.info("Starting uvicorn subprocess server...")
965
+ self._uvicorn_server = self._create_uvicorn_server()
966
+
967
+ self._proc = ctx.Process(
968
+ target=run_uvicorn_subprocess,
969
+ kwargs={
970
+ "uvicorn_server": self._uvicorn_server,
971
+ "serve_context": self._ctx(),
972
+ "event_queue": self._mp_event_queue,
973
+ "timeout": self.args.startup_timeout,
974
+ "health_url": self.health_url,
975
+ },
976
+ daemon=True,
977
+ )
978
+ self._proc.start()
979
+
980
+ # Wait for ready or error event from the thread
981
+ timeout = self.args.startup_timeout * 2 # Allows twice the timeout for the thread to get the event
982
+ try:
983
+ evt: ChildEvent = await asyncio.to_thread(self._mp_event_queue.get, True, timeout)
984
+ except queue.Empty:
985
+ if not self._proc.is_alive():
986
+ raise RuntimeError("Server process failed to start and is not alive. No error event was received.")
987
+ logger.error(
988
+ "Server process failed to start and sends no event. This should not happen. Shutting down server."
989
+ )
990
+ await self._stop_serving_process()
991
+ raise RuntimeError("Server process failed to start and sends no event. This should not happen.")
992
+
993
+ if evt.kind == "error":
994
+ logger.error(
995
+ "Server process (%s) failed to start (%s): %s\n%s",
996
+ "gunicorn" if self.args.n_workers > 1 else "uvicorn",
997
+ evt.exc_type,
998
+ evt.message,
999
+ evt.traceback,
1000
+ )
1001
+ await asyncio.to_thread(self._proc.join, self.args.process_join_timeout)
1002
+ if self._proc.is_alive():
1003
+ logger.error("Server process failed to start and refused to shut down.")
1004
+ raise RuntimeError(evt.message)
1005
+ else:
1006
+ logger.info("Subprocess server started successfully.")
1007
+ self._is_running = True
1008
+
1009
+ async def _stop_serving_process(self):
1010
+ logger.info("Stopping subprocess server...")
1011
+ if self._proc is not None:
1012
+ if self._proc.is_alive():
1013
+ # Prefer graceful: SIGTERM, then wait
1014
+ try:
1015
+ self._proc.terminate()
1016
+ except Exception:
1017
+ logger.exception("Error sending SIGTERM to server process.")
1018
+ await asyncio.to_thread(self._proc.join, self.args.process_join_timeout)
1019
+
1020
+ if self._proc.is_alive():
1021
+ # Still alive, send SIGKILL
1022
+ try:
1023
+ self._proc.kill()
1024
+ except Exception:
1025
+ logger.exception("Error sending SIGKILL to server process.")
1026
+ await asyncio.to_thread(self._proc.join, 5.0) # Use a constant timeout for SIGKILL
1027
+
1028
+ if self._proc.is_alive():
1029
+ raise RuntimeError("Server process failed to shut down after SIGTERM and SIGKILL.")
1030
+ else:
1031
+ logger.info("Subprocess server was not running. Nothing to stop.")
1032
+
1033
+ if self._mp_event_queue is not None:
1034
+ self._mp_event_queue.close()
1035
+ try:
1036
+ self._mp_event_queue.join_thread()
1037
+ except Exception:
1038
+ logger.exception("Error joining event queue thread.")
1039
+
1040
+ self._proc = None
1041
+ self._mp_event_queue = None
1042
+ self._gunicorn_app = None
1043
+ self._uvicorn_server = None
1044
+ self._is_running = False
1045
+ logger.info("Subprocess server stopped")