mantisdk 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mantisdk might be problematic. Click here for more details.

Files changed (190) hide show
  1. mantisdk/__init__.py +22 -0
  2. mantisdk/adapter/__init__.py +15 -0
  3. mantisdk/adapter/base.py +94 -0
  4. mantisdk/adapter/messages.py +270 -0
  5. mantisdk/adapter/triplet.py +1028 -0
  6. mantisdk/algorithm/__init__.py +39 -0
  7. mantisdk/algorithm/apo/__init__.py +5 -0
  8. mantisdk/algorithm/apo/apo.py +889 -0
  9. mantisdk/algorithm/apo/prompts/apply_edit_variant01.poml +22 -0
  10. mantisdk/algorithm/apo/prompts/apply_edit_variant02.poml +18 -0
  11. mantisdk/algorithm/apo/prompts/text_gradient_variant01.poml +18 -0
  12. mantisdk/algorithm/apo/prompts/text_gradient_variant02.poml +16 -0
  13. mantisdk/algorithm/apo/prompts/text_gradient_variant03.poml +107 -0
  14. mantisdk/algorithm/base.py +162 -0
  15. mantisdk/algorithm/decorator.py +264 -0
  16. mantisdk/algorithm/fast.py +250 -0
  17. mantisdk/algorithm/gepa/__init__.py +59 -0
  18. mantisdk/algorithm/gepa/adapter.py +459 -0
  19. mantisdk/algorithm/gepa/gepa.py +364 -0
  20. mantisdk/algorithm/gepa/lib/__init__.py +18 -0
  21. mantisdk/algorithm/gepa/lib/adapters/README.md +12 -0
  22. mantisdk/algorithm/gepa/lib/adapters/__init__.py +0 -0
  23. mantisdk/algorithm/gepa/lib/adapters/anymaths_adapter/README.md +341 -0
  24. mantisdk/algorithm/gepa/lib/adapters/anymaths_adapter/__init__.py +1 -0
  25. mantisdk/algorithm/gepa/lib/adapters/anymaths_adapter/anymaths_adapter.py +174 -0
  26. mantisdk/algorithm/gepa/lib/adapters/anymaths_adapter/requirements.txt +1 -0
  27. mantisdk/algorithm/gepa/lib/adapters/default_adapter/README.md +0 -0
  28. mantisdk/algorithm/gepa/lib/adapters/default_adapter/__init__.py +0 -0
  29. mantisdk/algorithm/gepa/lib/adapters/default_adapter/default_adapter.py +209 -0
  30. mantisdk/algorithm/gepa/lib/adapters/dspy_adapter/README.md +7 -0
  31. mantisdk/algorithm/gepa/lib/adapters/dspy_adapter/__init__.py +0 -0
  32. mantisdk/algorithm/gepa/lib/adapters/dspy_adapter/dspy_adapter.py +307 -0
  33. mantisdk/algorithm/gepa/lib/adapters/dspy_full_program_adapter/README.md +99 -0
  34. mantisdk/algorithm/gepa/lib/adapters/dspy_full_program_adapter/dspy_program_proposal_signature.py +137 -0
  35. mantisdk/algorithm/gepa/lib/adapters/dspy_full_program_adapter/full_program_adapter.py +266 -0
  36. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/GEPA_RAG.md +621 -0
  37. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/__init__.py +56 -0
  38. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/evaluation_metrics.py +226 -0
  39. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/generic_rag_adapter.py +496 -0
  40. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/rag_pipeline.py +238 -0
  41. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_store_interface.py +212 -0
  42. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_stores/__init__.py +2 -0
  43. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_stores/chroma_store.py +196 -0
  44. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_stores/lancedb_store.py +422 -0
  45. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_stores/milvus_store.py +409 -0
  46. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_stores/qdrant_store.py +368 -0
  47. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_stores/weaviate_store.py +418 -0
  48. mantisdk/algorithm/gepa/lib/adapters/mcp_adapter/README.md +552 -0
  49. mantisdk/algorithm/gepa/lib/adapters/mcp_adapter/__init__.py +37 -0
  50. mantisdk/algorithm/gepa/lib/adapters/mcp_adapter/mcp_adapter.py +705 -0
  51. mantisdk/algorithm/gepa/lib/adapters/mcp_adapter/mcp_client.py +364 -0
  52. mantisdk/algorithm/gepa/lib/adapters/terminal_bench_adapter/README.md +9 -0
  53. mantisdk/algorithm/gepa/lib/adapters/terminal_bench_adapter/__init__.py +0 -0
  54. mantisdk/algorithm/gepa/lib/adapters/terminal_bench_adapter/terminal_bench_adapter.py +217 -0
  55. mantisdk/algorithm/gepa/lib/api.py +375 -0
  56. mantisdk/algorithm/gepa/lib/core/__init__.py +0 -0
  57. mantisdk/algorithm/gepa/lib/core/adapter.py +180 -0
  58. mantisdk/algorithm/gepa/lib/core/data_loader.py +74 -0
  59. mantisdk/algorithm/gepa/lib/core/engine.py +356 -0
  60. mantisdk/algorithm/gepa/lib/core/result.py +233 -0
  61. mantisdk/algorithm/gepa/lib/core/state.py +636 -0
  62. mantisdk/algorithm/gepa/lib/examples/__init__.py +0 -0
  63. mantisdk/algorithm/gepa/lib/examples/aime.py +24 -0
  64. mantisdk/algorithm/gepa/lib/examples/anymaths-bench/eval_default.py +111 -0
  65. mantisdk/algorithm/gepa/lib/examples/anymaths-bench/prompt-templates/instruction_prompt.txt +9 -0
  66. mantisdk/algorithm/gepa/lib/examples/anymaths-bench/prompt-templates/optimal_prompt.txt +24 -0
  67. mantisdk/algorithm/gepa/lib/examples/anymaths-bench/train_anymaths.py +177 -0
  68. mantisdk/algorithm/gepa/lib/examples/dspy_full_program_evolution/arc_agi.ipynb +25705 -0
  69. mantisdk/algorithm/gepa/lib/examples/dspy_full_program_evolution/example.ipynb +348 -0
  70. mantisdk/algorithm/gepa/lib/examples/mcp_adapter/__init__.py +4 -0
  71. mantisdk/algorithm/gepa/lib/examples/mcp_adapter/mcp_optimization_example.py +455 -0
  72. mantisdk/algorithm/gepa/lib/examples/rag_adapter/RAG_GUIDE.md +613 -0
  73. mantisdk/algorithm/gepa/lib/examples/rag_adapter/__init__.py +9 -0
  74. mantisdk/algorithm/gepa/lib/examples/rag_adapter/rag_optimization.py +824 -0
  75. mantisdk/algorithm/gepa/lib/examples/rag_adapter/requirements-rag.txt +29 -0
  76. mantisdk/algorithm/gepa/lib/examples/terminal-bench/prompt-templates/instruction_prompt.txt +16 -0
  77. mantisdk/algorithm/gepa/lib/examples/terminal-bench/prompt-templates/terminus.txt +9 -0
  78. mantisdk/algorithm/gepa/lib/examples/terminal-bench/train_terminus.py +161 -0
  79. mantisdk/algorithm/gepa/lib/gepa_utils.py +117 -0
  80. mantisdk/algorithm/gepa/lib/logging/__init__.py +0 -0
  81. mantisdk/algorithm/gepa/lib/logging/experiment_tracker.py +187 -0
  82. mantisdk/algorithm/gepa/lib/logging/logger.py +75 -0
  83. mantisdk/algorithm/gepa/lib/logging/utils.py +103 -0
  84. mantisdk/algorithm/gepa/lib/proposer/__init__.py +0 -0
  85. mantisdk/algorithm/gepa/lib/proposer/base.py +31 -0
  86. mantisdk/algorithm/gepa/lib/proposer/merge.py +357 -0
  87. mantisdk/algorithm/gepa/lib/proposer/reflective_mutation/__init__.py +0 -0
  88. mantisdk/algorithm/gepa/lib/proposer/reflective_mutation/base.py +49 -0
  89. mantisdk/algorithm/gepa/lib/proposer/reflective_mutation/reflective_mutation.py +176 -0
  90. mantisdk/algorithm/gepa/lib/py.typed +0 -0
  91. mantisdk/algorithm/gepa/lib/strategies/__init__.py +0 -0
  92. mantisdk/algorithm/gepa/lib/strategies/batch_sampler.py +77 -0
  93. mantisdk/algorithm/gepa/lib/strategies/candidate_selector.py +50 -0
  94. mantisdk/algorithm/gepa/lib/strategies/component_selector.py +36 -0
  95. mantisdk/algorithm/gepa/lib/strategies/eval_policy.py +64 -0
  96. mantisdk/algorithm/gepa/lib/strategies/instruction_proposal.py +127 -0
  97. mantisdk/algorithm/gepa/lib/utils/__init__.py +10 -0
  98. mantisdk/algorithm/gepa/lib/utils/stop_condition.py +196 -0
  99. mantisdk/algorithm/gepa/tracing.py +105 -0
  100. mantisdk/algorithm/utils.py +177 -0
  101. mantisdk/algorithm/verl/__init__.py +5 -0
  102. mantisdk/algorithm/verl/interface.py +202 -0
  103. mantisdk/cli/__init__.py +56 -0
  104. mantisdk/cli/prometheus.py +115 -0
  105. mantisdk/cli/store.py +131 -0
  106. mantisdk/cli/vllm.py +29 -0
  107. mantisdk/client.py +408 -0
  108. mantisdk/config.py +348 -0
  109. mantisdk/emitter/__init__.py +43 -0
  110. mantisdk/emitter/annotation.py +370 -0
  111. mantisdk/emitter/exception.py +54 -0
  112. mantisdk/emitter/message.py +61 -0
  113. mantisdk/emitter/object.py +117 -0
  114. mantisdk/emitter/reward.py +320 -0
  115. mantisdk/env_var.py +156 -0
  116. mantisdk/execution/__init__.py +15 -0
  117. mantisdk/execution/base.py +64 -0
  118. mantisdk/execution/client_server.py +443 -0
  119. mantisdk/execution/events.py +69 -0
  120. mantisdk/execution/inter_process.py +16 -0
  121. mantisdk/execution/shared_memory.py +282 -0
  122. mantisdk/instrumentation/__init__.py +119 -0
  123. mantisdk/instrumentation/agentops.py +314 -0
  124. mantisdk/instrumentation/agentops_langchain.py +45 -0
  125. mantisdk/instrumentation/litellm.py +83 -0
  126. mantisdk/instrumentation/vllm.py +81 -0
  127. mantisdk/instrumentation/weave.py +500 -0
  128. mantisdk/litagent/__init__.py +11 -0
  129. mantisdk/litagent/decorator.py +536 -0
  130. mantisdk/litagent/litagent.py +252 -0
  131. mantisdk/llm_proxy.py +1890 -0
  132. mantisdk/logging.py +370 -0
  133. mantisdk/reward.py +7 -0
  134. mantisdk/runner/__init__.py +11 -0
  135. mantisdk/runner/agent.py +845 -0
  136. mantisdk/runner/base.py +182 -0
  137. mantisdk/runner/legacy.py +309 -0
  138. mantisdk/semconv.py +170 -0
  139. mantisdk/server.py +401 -0
  140. mantisdk/store/__init__.py +23 -0
  141. mantisdk/store/base.py +897 -0
  142. mantisdk/store/client_server.py +2092 -0
  143. mantisdk/store/collection/__init__.py +30 -0
  144. mantisdk/store/collection/base.py +587 -0
  145. mantisdk/store/collection/memory.py +970 -0
  146. mantisdk/store/collection/mongo.py +1412 -0
  147. mantisdk/store/collection_based.py +1823 -0
  148. mantisdk/store/insight.py +648 -0
  149. mantisdk/store/listener.py +58 -0
  150. mantisdk/store/memory.py +396 -0
  151. mantisdk/store/mongo.py +165 -0
  152. mantisdk/store/sqlite.py +3 -0
  153. mantisdk/store/threading.py +357 -0
  154. mantisdk/store/utils.py +142 -0
  155. mantisdk/tracer/__init__.py +16 -0
  156. mantisdk/tracer/agentops.py +242 -0
  157. mantisdk/tracer/base.py +287 -0
  158. mantisdk/tracer/dummy.py +106 -0
  159. mantisdk/tracer/otel.py +555 -0
  160. mantisdk/tracer/weave.py +677 -0
  161. mantisdk/trainer/__init__.py +6 -0
  162. mantisdk/trainer/init_utils.py +263 -0
  163. mantisdk/trainer/legacy.py +367 -0
  164. mantisdk/trainer/registry.py +12 -0
  165. mantisdk/trainer/trainer.py +618 -0
  166. mantisdk/types/__init__.py +6 -0
  167. mantisdk/types/core.py +553 -0
  168. mantisdk/types/resources.py +204 -0
  169. mantisdk/types/tracer.py +515 -0
  170. mantisdk/types/tracing.py +218 -0
  171. mantisdk/utils/__init__.py +1 -0
  172. mantisdk/utils/id.py +18 -0
  173. mantisdk/utils/metrics.py +1025 -0
  174. mantisdk/utils/otel.py +578 -0
  175. mantisdk/utils/otlp.py +536 -0
  176. mantisdk/utils/server_launcher.py +1045 -0
  177. mantisdk/utils/system_snapshot.py +81 -0
  178. mantisdk/verl/__init__.py +8 -0
  179. mantisdk/verl/__main__.py +6 -0
  180. mantisdk/verl/async_server.py +46 -0
  181. mantisdk/verl/config.yaml +27 -0
  182. mantisdk/verl/daemon.py +1154 -0
  183. mantisdk/verl/dataset.py +44 -0
  184. mantisdk/verl/entrypoint.py +248 -0
  185. mantisdk/verl/trainer.py +549 -0
  186. mantisdk-0.1.0.dist-info/METADATA +119 -0
  187. mantisdk-0.1.0.dist-info/RECORD +190 -0
  188. mantisdk-0.1.0.dist-info/WHEEL +4 -0
  189. mantisdk-0.1.0.dist-info/entry_points.txt +2 -0
  190. mantisdk-0.1.0.dist-info/licenses/LICENSE +19 -0
mantisdk/server.py ADDED
@@ -0,0 +1,401 @@
1
+ # Copyright (c) Microsoft. All rights reserved.
2
+
3
+ """Legacy HTTP server compatible with the original Mantisdk protocol.
4
+
5
+ The implementation in this module predates the modern store-powered runtime and
6
+ is kept for backwards compatibility with older deployments. New applications
7
+ should migrate to the store architecture where possible.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import asyncio
13
+ import logging
14
+ import threading
15
+ import time
16
+ import uuid
17
+ import warnings
18
+ from contextlib import asynccontextmanager
19
+ from typing import Any, Dict, List, Literal, Optional
20
+
21
+ import uvicorn
22
+ from fastapi import FastAPI, HTTPException, Path
23
+
24
+ from .types import (
25
+ GenericResponse,
26
+ NamedResources,
27
+ ResourcesUpdate,
28
+ RolloutLegacy,
29
+ Task,
30
+ TaskIfAny,
31
+ )
32
+
33
+ logger = logging.getLogger(__name__)
34
+
35
+
36
+ class ServerDataStore:
37
+ """Async-safe container for in-memory server state.
38
+
39
+ The store tracks queued tasks, claimed tasks, uploaded rollouts, and the
40
+ currently published resources. All interactions are guarded by asyncio locks
41
+ so that the FastAPI handlers can safely run in parallel.
42
+
43
+ !!! warning "Deprecated"
44
+ [`ServerDataStore`][mantisdk.server.ServerDataStore] is part of
45
+ the legacy client/server stack. Use [`LightningStore`][mantisdk.LightningStore] instead.
46
+ """
47
+
48
+ def __init__(self):
49
+ self._task_queue: asyncio.Queue[Task] = asyncio.Queue()
50
+ self._processing_tasks: Dict[str, Task] = {} # Currently processing tasks
51
+ self._completed_rollouts: Dict[str, RolloutLegacy] = {}
52
+
53
+ # Store for versioned resources
54
+ self._resource_versions: Dict[str, NamedResources] = {}
55
+ self._latest_resources_id: Optional[str] = None
56
+
57
+ # Locks for thread-safe access
58
+ self._results_lock = asyncio.Lock()
59
+ self._resources_lock = asyncio.Lock()
60
+
61
+ async def add_task(
62
+ self,
63
+ sample: Any,
64
+ mode: Literal["train", "val", "test"] | None = None,
65
+ resources_id: str | None = None,
66
+ metadata: Dict[str, Any] | None = None,
67
+ ) -> str:
68
+ """Enqueue a new task and return the generated rollout identifier.
69
+
70
+ Args:
71
+ sample: Payload that describes the task input.
72
+ mode: Phase in which the sample should be executed (`"train"`, `"val"`, or
73
+ `"test"`).
74
+ resources_id: Identifier of a resource bundle that the executor should
75
+ load before running the task.
76
+ metadata: Optional metadata forwarded to the executor.
77
+
78
+ Returns:
79
+ Unique rollout identifier assigned to the task.
80
+ """
81
+ rollout_id = f"rollout-{uuid.uuid4()}"
82
+ task = Task(
83
+ rollout_id=rollout_id,
84
+ input=sample,
85
+ mode=mode,
86
+ resources_id=resources_id,
87
+ create_time=time.time(),
88
+ num_claims=0,
89
+ metadata=metadata or {},
90
+ )
91
+ await self._task_queue.put(task)
92
+ logger.info(f"Task queued: {rollout_id} (mode: {mode}, resources_id: {resources_id})")
93
+ return rollout_id
94
+
95
+ async def get_next_task(self) -> Optional[Task]:
96
+ """Retrieve the next task from the queue without blocking.
97
+
98
+ Returns:
99
+ Next [`Task`][mantisdk.Task] ready to execute, or ``None``
100
+ when the queue is empty.
101
+ """
102
+ try:
103
+ async with self._results_lock:
104
+ task = self._task_queue.get_nowait()
105
+ task = task.model_copy(
106
+ update={
107
+ "last_claim_time": time.time(),
108
+ "num_claims": (task.num_claims or 0) + 1,
109
+ }
110
+ )
111
+ self._processing_tasks[task.rollout_id] = task
112
+ if task.num_claims == 1:
113
+ logger.debug(f"Next task retrieved: {task.rollout_id}")
114
+ else:
115
+ logger.info(f"Task {task.rollout_id} re-claimed (attempt {task.num_claims})")
116
+ return task
117
+ except asyncio.QueueEmpty:
118
+ return None
119
+
120
+ async def update_resources(self, update: ResourcesUpdate):
121
+ """Persist a new resource bundle and mark it as the latest version.
122
+
123
+ Args:
124
+ update: Resource payload received from a client.
125
+ """
126
+ # TODO: evict old resources if necessary.
127
+ async with self._resources_lock:
128
+ self._resource_versions[update.resources_id] = update.resources
129
+ self._latest_resources_id = update.resources_id
130
+ logger.info(f"Resources updated. New version '{update.resources_id}' is now latest.")
131
+
132
+ async def get_resources_by_id(self, resources_id: str) -> Optional[ResourcesUpdate]:
133
+ """Retrieve a specific resource bundle by identifier.
134
+
135
+ Args:
136
+ resources_id: Identifier that was previously published to the store.
137
+
138
+ Returns:
139
+ Matching [`ResourcesUpdate`][mantisdk.ResourcesUpdate]
140
+ instance, or ``None`` when the identifier is unknown.
141
+ """
142
+ async with self._resources_lock:
143
+ resources = self._resource_versions.get(resources_id)
144
+ if resources:
145
+ return ResourcesUpdate(
146
+ resources_id=resources_id,
147
+ resources=resources,
148
+ create_time=time.time(),
149
+ update_time=time.time(),
150
+ version=1,
151
+ )
152
+ return None
153
+
154
+ async def get_latest_resources(self) -> Optional[ResourcesUpdate]:
155
+ """Return the most recent resource bundle, if one exists."""
156
+ if self._latest_resources_id:
157
+ return await self.get_resources_by_id(self._latest_resources_id)
158
+ return None
159
+
160
+ async def store_rollout(self, rollout: RolloutLegacy):
161
+ """Persist a completed rollout for later inspection.
162
+
163
+ Args:
164
+ rollout: Rollout returned by a client.
165
+ """
166
+ async with self._results_lock:
167
+ self._processing_tasks.pop(rollout.rollout_id, None)
168
+ self._completed_rollouts[rollout.rollout_id] = rollout
169
+ logger.info(f"Rollout received and stored: {rollout.rollout_id}")
170
+
171
+ async def retrieve_rollout(self, rollout_id: str) -> Optional[RolloutLegacy]:
172
+ """Retrieve and remove a stored rollout by identifier.
173
+
174
+ Args:
175
+ rollout_id: Identifier of the rollout to fetch.
176
+
177
+ Returns:
178
+ Stored [`RolloutLegacy`][mantisdk.RolloutLegacy], or ``None``
179
+ when the identifier is unknown.
180
+ """
181
+ async with self._results_lock:
182
+ return self._completed_rollouts.pop(rollout_id, None)
183
+
184
+ async def retrieve_completed_rollouts(self) -> List[RolloutLegacy]:
185
+ """Return all completed rollouts and clear the internal buffer."""
186
+ async with self._results_lock:
187
+ rollouts = list(self._completed_rollouts.values())
188
+ self._completed_rollouts.clear()
189
+ return rollouts
190
+
191
+ def get_processing_tasks(self) -> Dict[str, Task]:
192
+ """Return a copy of currently processing tasks for timeout checking."""
193
+ return self._processing_tasks.copy()
194
+
195
+ async def requeue_task(self, task: Task):
196
+ """Requeue a task that timed out while being processed."""
197
+ logger.warning(f"Requeuing task {task.rollout_id} after timeout (attempt {task.num_claims})")
198
+ async with self._results_lock:
199
+ # Remove from processing tasks
200
+ self._processing_tasks.pop(task.rollout_id, None)
201
+ self._task_queue.put_nowait(task)
202
+
203
+
204
+ class MantisdkServer:
205
+ """High-level controller for the legacy Mantisdk FastAPI server.
206
+
207
+ The controller orchestrates server start-up, task queueing, resource updates,
208
+ and retrieval of client rollouts. It is primarily used by existing systems that
209
+ still rely on the HTTP-based workflow.
210
+
211
+ !!! warning "Deprecated"
212
+ [`MantisdkServer`][mantisdk.server.MantisdkServer] is part of
213
+ the legacy client/server stack. Prefer the store-based runtime for new
214
+ integrations.
215
+ """
216
+
217
+ def __init__(self, host: str = "127.0.0.1", port: int = 8000, task_timeout_seconds: float = 300.0):
218
+ """Initialize the controller.
219
+
220
+ Args:
221
+ host: Hostname or IP address to bind the HTTP server to.
222
+ port: TCP port exposed by the server.
223
+ task_timeout_seconds: Seconds before a claimed task is considered stale and
224
+ re-queued.
225
+ """
226
+ warnings.warn(
227
+ "MantisdkServer is deprecated. Please use LightningStoreServer instead.", DeprecationWarning
228
+ )
229
+ self.host = host
230
+ self.port = port
231
+ self.endpoint = f"http://{host}:{port}"
232
+ self._task_timeout_seconds = task_timeout_seconds
233
+
234
+ # Defer initialization and use event for cross-thread communication
235
+ self._store: Optional[ServerDataStore] = None
236
+ self.loop: Optional[asyncio.AbstractEventLoop] = None
237
+ self.startup_event = threading.Event()
238
+
239
+ # Create FastAPI app instance with a lifespan manager
240
+ self._app = FastAPI(lifespan=self._lifespan)
241
+ self._setup_routes()
242
+
243
+ self._uvicorn_config = uvicorn.Config(self._app, host=self.host, port=self.port, log_level="info")
244
+ self._uvicorn_server = uvicorn.Server(self._uvicorn_config)
245
+
246
+ # --- ADDED: Lifespan context manager ---
247
+ @asynccontextmanager
248
+ async def _lifespan(self, app: FastAPI):
249
+ """Manage server start-up and shutdown within the event loop."""
250
+ logger.info("Server is starting up...")
251
+ self.loop = asyncio.get_running_loop()
252
+ self._store = ServerDataStore() # Initialize data store here
253
+ self.startup_event.set() # Signal that the server is ready
254
+
255
+ yield
256
+
257
+ logger.info("Server is shutting down.")
258
+ self._store = None
259
+ self.startup_event.clear() # Clear the startup event
260
+ self.loop = None
261
+
262
+ async def _check_and_requeue_stale_tasks(self):
263
+ """Check for stale tasks and requeue them when they exceed the timeout."""
264
+ current_time = time.time()
265
+ # Ensure store is initialized before checking
266
+ if not self._store:
267
+ return
268
+ processing_tasks = self._store.get_processing_tasks()
269
+
270
+ for _, task in processing_tasks.items():
271
+ if task.last_claim_time and current_time - task.last_claim_time > self._task_timeout_seconds:
272
+ await self._store.requeue_task(task)
273
+ logger.warning(
274
+ f"Task {task.rollout_id} timed out after {self._task_timeout_seconds}s, requeued (attempt {task.num_claims})"
275
+ )
276
+
277
+ def _setup_routes(self):
278
+ """Configure the FastAPI routes that make up the legacy HTTP API."""
279
+
280
+ @self._app.get("/task", response_model=TaskIfAny)
281
+ async def next_task() -> TaskIfAny: # type: ignore
282
+ """Provide the next available task to a client."""
283
+ await self._check_and_requeue_stale_tasks()
284
+
285
+ if not self._store:
286
+ return TaskIfAny(is_available=False)
287
+
288
+ task = await self._store.get_next_task()
289
+ if task:
290
+ logger.debug(f"Serving task {task.rollout_id} to a client.")
291
+ return TaskIfAny(is_available=True, task=task)
292
+ else:
293
+ logger.debug("No task available for client.")
294
+ return TaskIfAny(is_available=False)
295
+
296
+ @self._app.get("/resources/latest", response_model=ResourcesUpdate)
297
+ async def fetch_latest_resources() -> ResourcesUpdate: # type: ignore
298
+ """Return the most recent resource bundle published to the server."""
299
+ if not self._store:
300
+ raise HTTPException(status_code=503, detail="Server not fully initialized.")
301
+ resources_update = await self._store.get_latest_resources()
302
+ if not resources_update:
303
+ raise HTTPException(status_code=404, detail="No resources have been set on the server.")
304
+ logger.debug(f"Serving latest resources '{resources_update.resources_id}' to a client.")
305
+ return resources_update
306
+
307
+ @self._app.get("/resources/{resource_id}", response_model=ResourcesUpdate)
308
+ async def fetch_resources_by_id( # type: ignore
309
+ resource_id: str = Path(..., description="The unique identifier for the resource version.")
310
+ ) -> ResourcesUpdate:
311
+ """Return a specific version of resources by identifier."""
312
+ if not self._store:
313
+ raise HTTPException(status_code=503, detail="Server not fully initialized.")
314
+ resources_update = await self._store.get_resources_by_id(resource_id)
315
+ if not resources_update:
316
+ raise HTTPException(status_code=404, detail=f"Resource ID '{resource_id}' not found.")
317
+ logger.debug(f"Serving resources for ID '{resource_id}' to a client.")
318
+ return resources_update
319
+
320
+ @self._app.post("/rollout", response_model=GenericResponse)
321
+ async def post_rollout(payload: RolloutLegacy) -> GenericResponse: # type: ignore
322
+ """Persist the rollout reported by a client."""
323
+ if not self._store:
324
+ raise HTTPException(status_code=503, detail="Server not fully initialized.")
325
+ await self._store.store_rollout(payload)
326
+ return GenericResponse(
327
+ status="ok",
328
+ message=f"Rollout {payload.rollout_id} received and stored.",
329
+ )
330
+
331
+ async def start(self):
332
+ """Start the FastAPI server in the background."""
333
+ logger.info(f"Starting server at {self.endpoint}")
334
+ asyncio.create_task(self._uvicorn_server.serve())
335
+ await asyncio.sleep(1) # Allow time for server to start up.
336
+
337
+ async def stop(self):
338
+ """Stop the FastAPI server and wait for a graceful shutdown."""
339
+ if self._uvicorn_server.started:
340
+ logger.info("Stopping server...")
341
+ self._uvicorn_server.should_exit = True
342
+ await asyncio.sleep(1) # Allow time for graceful shutdown.
343
+ logger.info("Server stopped.")
344
+
345
+ async def run_forever(self):
346
+ """Run the server indefinitely until `stop()` is invoked."""
347
+ await self._uvicorn_server.serve()
348
+
349
+ async def queue_task(
350
+ self,
351
+ sample: Any,
352
+ mode: Literal["train", "val", "test"] | None = None,
353
+ resources_id: str | None = None,
354
+ metadata: Dict[str, Any] | None = None,
355
+ ) -> str:
356
+ """Add a task to the queue for a client to process."""
357
+ if not self._store:
358
+ raise RuntimeError("Store not initialized. The server may not be running.")
359
+ return await self._store.add_task(sample, mode=mode, resources_id=resources_id, metadata=metadata)
360
+
361
+ async def update_resources(self, resources: NamedResources) -> str:
362
+ """Publish a new resource bundle and return its generated identifier."""
363
+ if not self._store:
364
+ raise RuntimeError("Store not initialized. The server may not be running.")
365
+ resources_id = f"res-{uuid.uuid4()}"
366
+ update = ResourcesUpdate(
367
+ resources_id=resources_id, resources=resources, create_time=time.time(), update_time=time.time(), version=1
368
+ )
369
+ await self._store.update_resources(update)
370
+ return resources_id
371
+
372
+ async def get_completed_rollout(self, rollout_id: str) -> Optional[RolloutLegacy]:
373
+ """Retrieve a specific completed rollout by identifier."""
374
+ if not self._store:
375
+ raise RuntimeError("Store not initialized. The server may not be running.")
376
+ return await self._store.retrieve_rollout(rollout_id)
377
+
378
+ async def poll_completed_rollout(self, rollout_id: str, timeout: Optional[float] = None) -> Optional[RolloutLegacy]:
379
+ """Poll for a completed rollout until it becomes available or a timeout expires.
380
+
381
+ Args:
382
+ rollout_id: Identifier of the rollout to wait for.
383
+ timeout: Maximum number of seconds to wait. ``None`` waits indefinitely.
384
+
385
+ Returns:
386
+ Retrieved rollout, or ``None`` when the timeout is reached without success.
387
+ """
388
+ start_time = time.time()
389
+ while True:
390
+ rollout = await self.get_completed_rollout(rollout_id)
391
+ if rollout:
392
+ return rollout
393
+ if timeout and (time.time() - start_time) >= timeout:
394
+ return None
395
+ await asyncio.sleep(1)
396
+
397
+ async def retrieve_completed_rollouts(self) -> List[RolloutLegacy]:
398
+ """Return every completed rollout and clear the internal buffer."""
399
+ if not self._store:
400
+ raise RuntimeError("Store not initialized. The server may not be running.")
401
+ return await self._store.retrieve_completed_rollouts()
@@ -0,0 +1,23 @@
1
+ # Copyright (c) Microsoft. All rights reserved.
2
+
3
+ from .base import LightningStore, LightningStoreCapabilities, LightningStoreStatistics
4
+ from .client_server import LightningStoreClient, LightningStoreServer
5
+ from .collection_based import CollectionBasedLightningStore
6
+ from .insight import InsightLightningStore, InsightTracker
7
+ from .listener import StorageListener
8
+ from .memory import InMemoryLightningStore
9
+ from .threading import LightningStoreThreaded
10
+
11
+ __all__ = [
12
+ "LightningStore",
13
+ "LightningStoreCapabilities",
14
+ "LightningStoreStatistics",
15
+ "LightningStoreClient",
16
+ "LightningStoreServer",
17
+ "InMemoryLightningStore",
18
+ "InsightLightningStore",
19
+ "InsightTracker",
20
+ "StorageListener",
21
+ "CollectionBasedLightningStore",
22
+ "LightningStoreThreaded",
23
+ ]