mantisdk 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mantisdk might be problematic. Click here for more details.

Files changed (190) hide show
  1. mantisdk/__init__.py +22 -0
  2. mantisdk/adapter/__init__.py +15 -0
  3. mantisdk/adapter/base.py +94 -0
  4. mantisdk/adapter/messages.py +270 -0
  5. mantisdk/adapter/triplet.py +1028 -0
  6. mantisdk/algorithm/__init__.py +39 -0
  7. mantisdk/algorithm/apo/__init__.py +5 -0
  8. mantisdk/algorithm/apo/apo.py +889 -0
  9. mantisdk/algorithm/apo/prompts/apply_edit_variant01.poml +22 -0
  10. mantisdk/algorithm/apo/prompts/apply_edit_variant02.poml +18 -0
  11. mantisdk/algorithm/apo/prompts/text_gradient_variant01.poml +18 -0
  12. mantisdk/algorithm/apo/prompts/text_gradient_variant02.poml +16 -0
  13. mantisdk/algorithm/apo/prompts/text_gradient_variant03.poml +107 -0
  14. mantisdk/algorithm/base.py +162 -0
  15. mantisdk/algorithm/decorator.py +264 -0
  16. mantisdk/algorithm/fast.py +250 -0
  17. mantisdk/algorithm/gepa/__init__.py +59 -0
  18. mantisdk/algorithm/gepa/adapter.py +459 -0
  19. mantisdk/algorithm/gepa/gepa.py +364 -0
  20. mantisdk/algorithm/gepa/lib/__init__.py +18 -0
  21. mantisdk/algorithm/gepa/lib/adapters/README.md +12 -0
  22. mantisdk/algorithm/gepa/lib/adapters/__init__.py +0 -0
  23. mantisdk/algorithm/gepa/lib/adapters/anymaths_adapter/README.md +341 -0
  24. mantisdk/algorithm/gepa/lib/adapters/anymaths_adapter/__init__.py +1 -0
  25. mantisdk/algorithm/gepa/lib/adapters/anymaths_adapter/anymaths_adapter.py +174 -0
  26. mantisdk/algorithm/gepa/lib/adapters/anymaths_adapter/requirements.txt +1 -0
  27. mantisdk/algorithm/gepa/lib/adapters/default_adapter/README.md +0 -0
  28. mantisdk/algorithm/gepa/lib/adapters/default_adapter/__init__.py +0 -0
  29. mantisdk/algorithm/gepa/lib/adapters/default_adapter/default_adapter.py +209 -0
  30. mantisdk/algorithm/gepa/lib/adapters/dspy_adapter/README.md +7 -0
  31. mantisdk/algorithm/gepa/lib/adapters/dspy_adapter/__init__.py +0 -0
  32. mantisdk/algorithm/gepa/lib/adapters/dspy_adapter/dspy_adapter.py +307 -0
  33. mantisdk/algorithm/gepa/lib/adapters/dspy_full_program_adapter/README.md +99 -0
  34. mantisdk/algorithm/gepa/lib/adapters/dspy_full_program_adapter/dspy_program_proposal_signature.py +137 -0
  35. mantisdk/algorithm/gepa/lib/adapters/dspy_full_program_adapter/full_program_adapter.py +266 -0
  36. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/GEPA_RAG.md +621 -0
  37. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/__init__.py +56 -0
  38. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/evaluation_metrics.py +226 -0
  39. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/generic_rag_adapter.py +496 -0
  40. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/rag_pipeline.py +238 -0
  41. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_store_interface.py +212 -0
  42. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_stores/__init__.py +2 -0
  43. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_stores/chroma_store.py +196 -0
  44. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_stores/lancedb_store.py +422 -0
  45. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_stores/milvus_store.py +409 -0
  46. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_stores/qdrant_store.py +368 -0
  47. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_stores/weaviate_store.py +418 -0
  48. mantisdk/algorithm/gepa/lib/adapters/mcp_adapter/README.md +552 -0
  49. mantisdk/algorithm/gepa/lib/adapters/mcp_adapter/__init__.py +37 -0
  50. mantisdk/algorithm/gepa/lib/adapters/mcp_adapter/mcp_adapter.py +705 -0
  51. mantisdk/algorithm/gepa/lib/adapters/mcp_adapter/mcp_client.py +364 -0
  52. mantisdk/algorithm/gepa/lib/adapters/terminal_bench_adapter/README.md +9 -0
  53. mantisdk/algorithm/gepa/lib/adapters/terminal_bench_adapter/__init__.py +0 -0
  54. mantisdk/algorithm/gepa/lib/adapters/terminal_bench_adapter/terminal_bench_adapter.py +217 -0
  55. mantisdk/algorithm/gepa/lib/api.py +375 -0
  56. mantisdk/algorithm/gepa/lib/core/__init__.py +0 -0
  57. mantisdk/algorithm/gepa/lib/core/adapter.py +180 -0
  58. mantisdk/algorithm/gepa/lib/core/data_loader.py +74 -0
  59. mantisdk/algorithm/gepa/lib/core/engine.py +356 -0
  60. mantisdk/algorithm/gepa/lib/core/result.py +233 -0
  61. mantisdk/algorithm/gepa/lib/core/state.py +636 -0
  62. mantisdk/algorithm/gepa/lib/examples/__init__.py +0 -0
  63. mantisdk/algorithm/gepa/lib/examples/aime.py +24 -0
  64. mantisdk/algorithm/gepa/lib/examples/anymaths-bench/eval_default.py +111 -0
  65. mantisdk/algorithm/gepa/lib/examples/anymaths-bench/prompt-templates/instruction_prompt.txt +9 -0
  66. mantisdk/algorithm/gepa/lib/examples/anymaths-bench/prompt-templates/optimal_prompt.txt +24 -0
  67. mantisdk/algorithm/gepa/lib/examples/anymaths-bench/train_anymaths.py +177 -0
  68. mantisdk/algorithm/gepa/lib/examples/dspy_full_program_evolution/arc_agi.ipynb +25705 -0
  69. mantisdk/algorithm/gepa/lib/examples/dspy_full_program_evolution/example.ipynb +348 -0
  70. mantisdk/algorithm/gepa/lib/examples/mcp_adapter/__init__.py +4 -0
  71. mantisdk/algorithm/gepa/lib/examples/mcp_adapter/mcp_optimization_example.py +455 -0
  72. mantisdk/algorithm/gepa/lib/examples/rag_adapter/RAG_GUIDE.md +613 -0
  73. mantisdk/algorithm/gepa/lib/examples/rag_adapter/__init__.py +9 -0
  74. mantisdk/algorithm/gepa/lib/examples/rag_adapter/rag_optimization.py +824 -0
  75. mantisdk/algorithm/gepa/lib/examples/rag_adapter/requirements-rag.txt +29 -0
  76. mantisdk/algorithm/gepa/lib/examples/terminal-bench/prompt-templates/instruction_prompt.txt +16 -0
  77. mantisdk/algorithm/gepa/lib/examples/terminal-bench/prompt-templates/terminus.txt +9 -0
  78. mantisdk/algorithm/gepa/lib/examples/terminal-bench/train_terminus.py +161 -0
  79. mantisdk/algorithm/gepa/lib/gepa_utils.py +117 -0
  80. mantisdk/algorithm/gepa/lib/logging/__init__.py +0 -0
  81. mantisdk/algorithm/gepa/lib/logging/experiment_tracker.py +187 -0
  82. mantisdk/algorithm/gepa/lib/logging/logger.py +75 -0
  83. mantisdk/algorithm/gepa/lib/logging/utils.py +103 -0
  84. mantisdk/algorithm/gepa/lib/proposer/__init__.py +0 -0
  85. mantisdk/algorithm/gepa/lib/proposer/base.py +31 -0
  86. mantisdk/algorithm/gepa/lib/proposer/merge.py +357 -0
  87. mantisdk/algorithm/gepa/lib/proposer/reflective_mutation/__init__.py +0 -0
  88. mantisdk/algorithm/gepa/lib/proposer/reflective_mutation/base.py +49 -0
  89. mantisdk/algorithm/gepa/lib/proposer/reflective_mutation/reflective_mutation.py +176 -0
  90. mantisdk/algorithm/gepa/lib/py.typed +0 -0
  91. mantisdk/algorithm/gepa/lib/strategies/__init__.py +0 -0
  92. mantisdk/algorithm/gepa/lib/strategies/batch_sampler.py +77 -0
  93. mantisdk/algorithm/gepa/lib/strategies/candidate_selector.py +50 -0
  94. mantisdk/algorithm/gepa/lib/strategies/component_selector.py +36 -0
  95. mantisdk/algorithm/gepa/lib/strategies/eval_policy.py +64 -0
  96. mantisdk/algorithm/gepa/lib/strategies/instruction_proposal.py +127 -0
  97. mantisdk/algorithm/gepa/lib/utils/__init__.py +10 -0
  98. mantisdk/algorithm/gepa/lib/utils/stop_condition.py +196 -0
  99. mantisdk/algorithm/gepa/tracing.py +105 -0
  100. mantisdk/algorithm/utils.py +177 -0
  101. mantisdk/algorithm/verl/__init__.py +5 -0
  102. mantisdk/algorithm/verl/interface.py +202 -0
  103. mantisdk/cli/__init__.py +56 -0
  104. mantisdk/cli/prometheus.py +115 -0
  105. mantisdk/cli/store.py +131 -0
  106. mantisdk/cli/vllm.py +29 -0
  107. mantisdk/client.py +408 -0
  108. mantisdk/config.py +348 -0
  109. mantisdk/emitter/__init__.py +43 -0
  110. mantisdk/emitter/annotation.py +370 -0
  111. mantisdk/emitter/exception.py +54 -0
  112. mantisdk/emitter/message.py +61 -0
  113. mantisdk/emitter/object.py +117 -0
  114. mantisdk/emitter/reward.py +320 -0
  115. mantisdk/env_var.py +156 -0
  116. mantisdk/execution/__init__.py +15 -0
  117. mantisdk/execution/base.py +64 -0
  118. mantisdk/execution/client_server.py +443 -0
  119. mantisdk/execution/events.py +69 -0
  120. mantisdk/execution/inter_process.py +16 -0
  121. mantisdk/execution/shared_memory.py +282 -0
  122. mantisdk/instrumentation/__init__.py +119 -0
  123. mantisdk/instrumentation/agentops.py +314 -0
  124. mantisdk/instrumentation/agentops_langchain.py +45 -0
  125. mantisdk/instrumentation/litellm.py +83 -0
  126. mantisdk/instrumentation/vllm.py +81 -0
  127. mantisdk/instrumentation/weave.py +500 -0
  128. mantisdk/litagent/__init__.py +11 -0
  129. mantisdk/litagent/decorator.py +536 -0
  130. mantisdk/litagent/litagent.py +252 -0
  131. mantisdk/llm_proxy.py +1890 -0
  132. mantisdk/logging.py +370 -0
  133. mantisdk/reward.py +7 -0
  134. mantisdk/runner/__init__.py +11 -0
  135. mantisdk/runner/agent.py +845 -0
  136. mantisdk/runner/base.py +182 -0
  137. mantisdk/runner/legacy.py +309 -0
  138. mantisdk/semconv.py +170 -0
  139. mantisdk/server.py +401 -0
  140. mantisdk/store/__init__.py +23 -0
  141. mantisdk/store/base.py +897 -0
  142. mantisdk/store/client_server.py +2092 -0
  143. mantisdk/store/collection/__init__.py +30 -0
  144. mantisdk/store/collection/base.py +587 -0
  145. mantisdk/store/collection/memory.py +970 -0
  146. mantisdk/store/collection/mongo.py +1412 -0
  147. mantisdk/store/collection_based.py +1823 -0
  148. mantisdk/store/insight.py +648 -0
  149. mantisdk/store/listener.py +58 -0
  150. mantisdk/store/memory.py +396 -0
  151. mantisdk/store/mongo.py +165 -0
  152. mantisdk/store/sqlite.py +3 -0
  153. mantisdk/store/threading.py +357 -0
  154. mantisdk/store/utils.py +142 -0
  155. mantisdk/tracer/__init__.py +16 -0
  156. mantisdk/tracer/agentops.py +242 -0
  157. mantisdk/tracer/base.py +287 -0
  158. mantisdk/tracer/dummy.py +106 -0
  159. mantisdk/tracer/otel.py +555 -0
  160. mantisdk/tracer/weave.py +677 -0
  161. mantisdk/trainer/__init__.py +6 -0
  162. mantisdk/trainer/init_utils.py +263 -0
  163. mantisdk/trainer/legacy.py +367 -0
  164. mantisdk/trainer/registry.py +12 -0
  165. mantisdk/trainer/trainer.py +618 -0
  166. mantisdk/types/__init__.py +6 -0
  167. mantisdk/types/core.py +553 -0
  168. mantisdk/types/resources.py +204 -0
  169. mantisdk/types/tracer.py +515 -0
  170. mantisdk/types/tracing.py +218 -0
  171. mantisdk/utils/__init__.py +1 -0
  172. mantisdk/utils/id.py +18 -0
  173. mantisdk/utils/metrics.py +1025 -0
  174. mantisdk/utils/otel.py +578 -0
  175. mantisdk/utils/otlp.py +536 -0
  176. mantisdk/utils/server_launcher.py +1045 -0
  177. mantisdk/utils/system_snapshot.py +81 -0
  178. mantisdk/verl/__init__.py +8 -0
  179. mantisdk/verl/__main__.py +6 -0
  180. mantisdk/verl/async_server.py +46 -0
  181. mantisdk/verl/config.yaml +27 -0
  182. mantisdk/verl/daemon.py +1154 -0
  183. mantisdk/verl/dataset.py +44 -0
  184. mantisdk/verl/entrypoint.py +248 -0
  185. mantisdk/verl/trainer.py +549 -0
  186. mantisdk-0.1.0.dist-info/METADATA +119 -0
  187. mantisdk-0.1.0.dist-info/RECORD +190 -0
  188. mantisdk-0.1.0.dist-info/WHEEL +4 -0
  189. mantisdk-0.1.0.dist-info/entry_points.txt +2 -0
  190. mantisdk-0.1.0.dist-info/licenses/LICENSE +19 -0
mantisdk/store/base.py ADDED
@@ -0,0 +1,897 @@
1
+ # Copyright (c) Microsoft. All rights reserved.
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Any, Dict, List, Literal, Optional, Sequence, Tuple, TypedDict, TYPE_CHECKING
6
+
7
+ from opentelemetry.sdk.trace import ReadableSpan
8
+
9
+ from mantisdk.types import (
10
+ Attempt,
11
+ AttemptedRollout,
12
+ AttemptStatus,
13
+ EnqueueRolloutRequest,
14
+ NamedResources,
15
+ ResourcesUpdate,
16
+ Rollout,
17
+ RolloutConfig,
18
+ RolloutMode,
19
+ RolloutStatus,
20
+ Span,
21
+ TaskInput,
22
+ Worker,
23
+ WorkerStatus,
24
+ )
25
+
26
+ if TYPE_CHECKING:
27
+ from .listener import StorageListener
28
+
29
+
30
+ def is_queuing(rollout: Rollout) -> bool:
31
+ return rollout.status == "queuing" or rollout.status == "requeuing"
32
+
33
+
34
+ def is_running(rollout: Rollout) -> bool:
35
+ return rollout.status == "preparing" or rollout.status == "running"
36
+
37
+
38
+ def is_finished(rollout: Rollout) -> bool:
39
+ return rollout.status == "failed" or rollout.status == "succeeded" or rollout.status == "cancelled"
40
+
41
+
42
+ class _UnsetType:
43
+ """A sentinel type to indicate an unset value."""
44
+
45
+ __slots__ = ()
46
+
47
+ def __repr__(self) -> str:
48
+ return "UNSET"
49
+
50
+ def __reduce__(self):
51
+ return (_get_unset, ())
52
+
53
+
54
+ def _get_unset() -> _UnsetType:
55
+ return UNSET
56
+
57
+
58
+ UNSET = _UnsetType()
59
+ Unset = _UnsetType # Alias for convenience
60
+
61
+
62
+ class LightningStoreCapabilities(TypedDict, total=False):
63
+ """Capability of a LightningStore implementation.
64
+
65
+ All keys are optional and false by default.
66
+ """
67
+
68
+ thread_safe: bool
69
+ """Whether the store is thread-safe."""
70
+ async_safe: bool
71
+ """Whether the store is async-safe."""
72
+ zero_copy: bool
73
+ """Whether the store has only one copy across all threads/processes."""
74
+ otlp_traces: bool
75
+ """Whether the store supports OTLP/HTTP traces."""
76
+
77
+
78
+ class LightningStoreStatistics(TypedDict, total=False):
79
+ """Statistics of a LightningStore implementation."""
80
+
81
+ name: str
82
+ """Name of the store implementation."""
83
+ total_rollouts: int
84
+ """Total number of rollouts in the store."""
85
+ total_attempts: int
86
+ """Total number of attempts in the store."""
87
+ total_spans: int
88
+ """Total number of spans in the store."""
89
+ total_resources: int
90
+ """Total number of resources in the store."""
91
+ total_workers: int
92
+ """Total number of workers in the store."""
93
+ uptime: float
94
+ """Uptime of since the store has been started."""
95
+
96
+ # Memory-related statistics
97
+ total_span_bytes: int
98
+ """Total number of bytes of spans in the store."""
99
+ eviction_threshold_bytes: int
100
+ """Eviction threshold for spans in bytes."""
101
+ safe_threshold_bytes: int
102
+ """Safe threshold for spans in bytes."""
103
+ memory_capacity_bytes: int
104
+ """Memory capacity of the store in bytes."""
105
+
106
+
107
+ class LightningStore:
108
+ """Contract for the persistent control-plane that coordinates training rollouts.
109
+
110
+ A `LightningStore` mediates every interaction between algorithms and runners:
111
+
112
+ - **Rollout lifecycle:** accept new rollouts, queue them for execution, create attempts,
113
+ and drive the rollout status machine (`"queuing"` → `"preparing"` → `"running"` →
114
+ `{"succeeded","failed","cancelled"}` or `"requeuing"` when a retry is justified).
115
+ - **Attempt tracking:** record each execution attempt, including progress heartbeats,
116
+ retry sequencing, and terminal states such as `"timeout"` or `"unresponsive"`.
117
+ - **Span ingest:** capture structured telemetry emitted by runners (either as native
118
+ [`Span`][mantisdk.Span] objects or as `opentelemetry.sdk.trace.ReadableSpan`
119
+ instances) so that algorithms can reconstruct trajectories and rewards.
120
+ - **Resource versioning:** manage immutable snapshots of named resources
121
+ (prompt templates, model checkpoints, proxy endpoints, …) and expose a single
122
+ "latest" snapshot that runners can fetch just after claiming work.
123
+
124
+ Implementations must provide thread-safe/async-safe semantics: each coroutine should
125
+ appear atomic to callers even when multiple algorithms or runners call the API concurrently.
126
+ Unless stated otherwise, missing identifiers should result in a `ValueError`.
127
+ """
128
+
129
+ def __init__(self, listeners: Optional[Sequence[StorageListener]] = None):
130
+ self.listeners: Sequence[StorageListener] = listeners or []
131
+
132
+ @property
133
+ def capabilities(self) -> LightningStoreCapabilities:
134
+ """Return the capabilities of the store."""
135
+ base_caps = LightningStoreCapabilities(
136
+ thread_safe=False,
137
+ async_safe=False,
138
+ zero_copy=False,
139
+ otlp_traces=False,
140
+ )
141
+
142
+ # Merge capabilities from listeners
143
+ for listener in self.listeners:
144
+ base_caps.update(listener.capabilities)
145
+
146
+ return base_caps
147
+
148
+ async def statistics(self) -> LightningStoreStatistics:
149
+ """Return the statistics of the store."""
150
+ return {
151
+ "name": self.__class__.__name__,
152
+ }
153
+
154
+ def otlp_traces_endpoint(self) -> str:
155
+ """Return the OTLP/HTTP traces endpoint of the store.
156
+
157
+ The traces can have rollout ID and attempt ID (and optionally sequence ID)
158
+ saved in the "resource" of the spans.
159
+ The store, if it supports OTLP, should be able to receive the traces and save them
160
+ via [`add_span`][mantisdk.LightningStore.add_span] or
161
+ [`add_otel_span`][mantisdk.LightningStore.add_otel_span].
162
+
163
+ The endpoint should be compatible with [OTLP HTTP protocol](https://opentelemetry.io/docs/specs/otlp/).
164
+ It's not necessarily compatible with OTLP gRPC protocol.
165
+
166
+ The returned endpoint will usually ends with `/v1/traces`.
167
+ """
168
+ # Delegate to listeners if any support OTLP
169
+ for listener in self.listeners:
170
+ endpoint = listener.otlp_traces_endpoint()
171
+ if endpoint:
172
+ return endpoint
173
+ raise NotImplementedError()
174
+
175
+ def get_otlp_headers(self) -> Dict[str, str]:
176
+ """Return the authentication headers for OTLP export."""
177
+ # Delegate to listeners if any support OTLP
178
+ for listener in self.listeners:
179
+ headers = listener.get_otlp_headers()
180
+ if headers:
181
+ return headers
182
+ return {}
183
+
184
+ def complete_job(self, summary: Optional[Dict[str, Any]] = None) -> None:
185
+ """Complete the job with an optional summary.
186
+
187
+ Delegates to any listener that has a `complete` method (e.g., InsightTracker)
188
+ to send the job.completed event with the provided summary.
189
+
190
+ Args:
191
+ summary: Optional dictionary containing job summary data (e.g., GEPA results).
192
+ """
193
+ for listener in self.listeners:
194
+ if hasattr(listener, 'complete'):
195
+ listener.complete(summary)
196
+
197
+ async def start_rollout(
198
+ self,
199
+ input: TaskInput,
200
+ mode: RolloutMode | None = None,
201
+ resources_id: str | None = None,
202
+ config: RolloutConfig | None = None,
203
+ metadata: Dict[str, Any] | None = None,
204
+ worker_id: str | None = None,
205
+ ) -> AttemptedRollout:
206
+ """Register a rollout and immediately create its first attempt.
207
+
208
+ !!! note
209
+ Use [`enqueue_rollout()`][mantisdk.LightningStore.enqueue_rollout] when the
210
+ caller only wants to submit work for later scheduling.
211
+
212
+ The rollout must be persisted with `status="preparing"` and an initial attempt
213
+ with `sequence_id == 1` so the caller can begin execution without visiting the
214
+ public queue. Implementations are expected to:
215
+
216
+ 1. Generate a unique `rollout_id` and `attempt_id`.
217
+ 2. Record `start_time` for both rollout and attempt based on the current clock.
218
+ 3. Copy `config` and `metadata` so later mutations do not leak shared references.
219
+ 4. Resolve `resources_id` to the latest resource snapshot when `None` is supplied.
220
+
221
+ Args:
222
+ input: Arbitrary task payload supplied by an algorithm.
223
+ mode: Optional semantic mode for downstream analytics (`"train"`, `"val"`, `"test"`).
224
+ resources_id: Concrete resource snapshot to execute against; defaults to the latest stored snapshot.
225
+ config: Rollout retry/timeout policy. Should default to a fresh [`RolloutConfig`][mantisdk.RolloutConfig].
226
+ metadata: Free-form metadata persisted verbatim with the rollout.
227
+ worker_id: Optional worker identifier to associate the new attempt with.
228
+
229
+ Returns:
230
+ The fully-populated [`AttemptedRollout`][mantisdk.AttemptedRollout] including
231
+ the just-created attempt.
232
+
233
+ Raises:
234
+ NotImplementedError: Subclasses must provide durable storage for the rollout.
235
+ ValueError: Implementations should raise when `resources_id` does not exist.
236
+ """
237
+ raise NotImplementedError()
238
+
239
+ async def enqueue_rollout(
240
+ self,
241
+ input: TaskInput,
242
+ mode: Literal["train", "val", "test"] | None = None,
243
+ resources_id: str | None = None,
244
+ config: RolloutConfig | None = None,
245
+ metadata: Dict[str, Any] | None = None,
246
+ ) -> Rollout:
247
+ """Persist a rollout in `queuing` state so runners can claim it later.
248
+
249
+ !!! note
250
+ Different from [`start_rollout()`][mantisdk.LightningStore.start_rollout],
251
+ this method is called when the caller only wants to submit work for later scheduling.
252
+
253
+ Implementations must generate a unique `rollout_id`, stamp `start_time` with
254
+ the current time, default `config` to a fresh [`RolloutConfig`][mantisdk.RolloutConfig],
255
+ and insert the rollout at the tail of the scheduling queue. No attempt is created yet.
256
+
257
+ Args:
258
+ input: Arbitrary task payload supplied by an algorithm.
259
+ mode: Optional semantic mode indicator (`"train"`, `"val"`, `"test"`).
260
+ resources_id: Resource snapshot used when a runner eventually executes the rollout.
261
+ config: Fine-grained retry/timeout parameters to persist with the rollout.
262
+ metadata: Free-form metadata stored verbatim with the rollout record.
263
+
264
+ Returns:
265
+ The stored [`Rollout`][mantisdk.Rollout] in `queuing` status.
266
+
267
+ Raises:
268
+ NotImplementedError: Subclasses must persist the rollout.
269
+ ValueError: Implementations should raise when `resources_id` does not exist.
270
+ """
271
+ raise NotImplementedError()
272
+
273
+ async def enqueue_many_rollouts(self, rollouts: Sequence[EnqueueRolloutRequest]) -> Sequence[Rollout]:
274
+ """Persist multiple rollouts in `queuing` state.
275
+
276
+ The implementation can delegate to [`enqueue_rollout()`][mantisdk.LightningStore.enqueue_rollout]
277
+ per request and preserves the input ordering. Subclasses can override to provide
278
+ more efficient bulk enqueue semantics.
279
+
280
+ Args:
281
+ rollouts: Rollout submission payloads mirroring [`enqueue_rollout()`][mantisdk.LightningStore.enqueue_rollout]'s
282
+ parameters. Each entry requires `input` and can optionally include other fields.
283
+
284
+ Returns:
285
+ Rollouts enqueued in the same order as `rollouts`.
286
+ """
287
+ raise NotImplementedError()
288
+
289
+ async def dequeue_rollout(self, worker_id: Optional[str] = None) -> Optional[AttemptedRollout]:
290
+ """Claim the oldest queued rollout and transition it to `preparing`.
291
+
292
+ This function do not block.
293
+
294
+ Retrieval must be FIFO across rollouts that remain in `queuing` or `requeuing`
295
+ state. When a rollout is claimed, implementations must:
296
+
297
+ * Transition its status to `"preparing"`.
298
+ * Create a new attempt with `status="preparing"` and `sequence_id` equal to
299
+ the number of attempts already registered for the rollout plus one.
300
+ * Return an [`AttemptedRollout`][mantisdk.AttemptedRollout] snapshot so the
301
+ runner knows both rollout metadata and the attempt identifier.
302
+ * Optionally refresh the caller's [`Worker`][mantisdk.Worker] telemetry
303
+ (e.g., `last_dequeue_time`) when `worker_id` is provided.
304
+
305
+ Args:
306
+ worker_id: Optional worker identifier to associate the claimed attempt with.
307
+
308
+ Returns:
309
+ The next attempt to execute, or `None` when no eligible rollouts are queued.
310
+
311
+ Raises:
312
+ NotImplementedError: Subclasses must implement queue retrieval.
313
+ """
314
+ raise NotImplementedError()
315
+
316
+ async def dequeue_many_rollouts(
317
+ self,
318
+ *,
319
+ limit: int = 1,
320
+ worker_id: Optional[str] = None,
321
+ ) -> Sequence[AttemptedRollout]:
322
+ """Claim up to `limit` queued rollouts without blocking.
323
+
324
+ The implementation can repeatedly invokes
325
+ [`dequeue_rollout()`][mantisdk.LightningStore.dequeue_rollout] until reaching
326
+ the requested limit or the queue is empty. Subclasses can override it to fetch
327
+ multiple rollouts atomically.
328
+
329
+ Args:
330
+ limit: Maximum number of rollouts to claim. Non-positive values return an empty list.
331
+ worker_id: Optional worker identifier passed through to each dequeue call.
332
+
333
+ Returns:
334
+ Attempted rollouts claimed in FIFO order. May contain fewer than `limit` entries
335
+ when the queue is exhausted.
336
+ """
337
+ raise NotImplementedError()
338
+
339
+ async def start_attempt(self, rollout_id: str, worker_id: Optional[str] = None) -> AttemptedRollout:
340
+ """Create a manual retry attempt for an existing rollout.
341
+
342
+ This is typically invoked by runners that wish to retry outside of the
343
+ normal queue flow (for example in an online RL setup).
344
+ Implementations must validate that the rollout exists, allocate a fresh `attempt_id`,
345
+ increment the `sequence_id` monotonically, stamp the new attempt with `status="preparing"`,
346
+ and return an up-to-date [`AttemptedRollout`][mantisdk.AttemptedRollout].
347
+
348
+ Args:
349
+ rollout_id: Unique identifier of the rollout receiving a new attempt.
350
+ worker_id: Optional worker identifier to associate the new attempt with.
351
+
352
+ Returns:
353
+ The rollout paired with its newly-created attempt.
354
+
355
+ Raises:
356
+ NotImplementedError: Subclasses must implement attempt creation.
357
+ ValueError: Implementations must raise when `rollout_id` is unknown.
358
+ """
359
+ raise NotImplementedError()
360
+
361
+ async def add_many_spans(self, spans: Sequence[Span]) -> Sequence[Span]:
362
+ """Persist a sequence of pre-constructed spans emitted during rollout execution.
363
+
364
+ Implementations can simply delegate to [`add_span()`][mantisdk.LightningStore.add_span] for each span.
365
+ However, if the store supports bulk insertion, it can implement this method to improve performance.
366
+ """
367
+ raise NotImplementedError()
368
+
369
+ async def add_span(self, span: Span) -> Optional[Span]:
370
+ """Persist a pre-constructed span emitted during rollout execution.
371
+
372
+ The provided [`Span`][mantisdk.Span] must already contain the `rollout_id`,
373
+ `attempt_id`, and `sequence_id`. Implementations must:
374
+
375
+ * Verify that both rollout and attempt exist.
376
+ * Ensure span ordering remains strictly increasing per attempt (rejecting or keeping duplicates).
377
+ * Treat the span arrival as a heartbeat: update the attempt's `last_heartbeat_time`
378
+ and transition both attempt and rollout to `"running"` if they were still
379
+ `"preparing"` or `"requeuing"`.
380
+
381
+ Args:
382
+ span: Fully populated span to persist.
383
+
384
+ Returns:
385
+ The stored span record (implementations may return a copy).
386
+ Return `None` if the span was not added due to a duplicate.
387
+
388
+ Raises:
389
+ NotImplementedError: Subclasses must implement span persistence.
390
+ ValueError: Implementations must raise when the referenced rollout or attempt is missing.
391
+ """
392
+ raise NotImplementedError()
393
+
394
+ async def add_otel_span(
395
+ self,
396
+ rollout_id: str,
397
+ attempt_id: str,
398
+ readable_span: ReadableSpan,
399
+ sequence_id: int | None = None,
400
+ ) -> Optional[Span]:
401
+ """Convert and persist an OpenTelemetry span for a particular attempt.
402
+
403
+ Implementations must transform the `readable_span` into a [`Span`][mantisdk.Span]
404
+ (typically via [`Span.from_opentelemetry()`][mantisdk.Span.from_opentelemetry]),
405
+ assign a strictly increasing `sequence_id` when one is not provided, and persist it
406
+ using the same semantics as [`add_span()`][mantisdk.LightningStore.add_span].
407
+
408
+ Args:
409
+ rollout_id: Identifier of the rollout that produced the span.
410
+ attempt_id: Attempt identifier the span belongs to.
411
+ readable_span: OpenTelemetry span in SDK form.
412
+ sequence_id: Optional explicit ordering hint. When omitted, call
413
+ [`get_next_span_sequence_id()`][mantisdk.LightningStore.get_next_span_sequence_id]
414
+ automatically.
415
+
416
+ Returns:
417
+ The stored span record. Return `None` if the span was not added due to a duplicate.
418
+
419
+ Raises:
420
+ NotImplementedError: Subclasses must implement span persistence.
421
+ ValueError: Implementations must raise when the rollout or attempt is unknown.
422
+ """
423
+ raise NotImplementedError()
424
+
425
+ async def query_rollouts(
426
+ self,
427
+ *,
428
+ status_in: Optional[Sequence[RolloutStatus]] = None,
429
+ rollout_id_in: Optional[Sequence[str]] = None,
430
+ rollout_id_contains: Optional[str] = None,
431
+ filter_logic: Literal["and", "or"] = "and",
432
+ sort_by: Optional[str] = None,
433
+ sort_order: Literal["asc", "desc"] = "asc",
434
+ limit: int = -1,
435
+ offset: int = 0,
436
+ # Deprecated fields
437
+ status: Optional[Sequence[RolloutStatus]] = None,
438
+ rollout_ids: Optional[Sequence[str]] = None,
439
+ ) -> Sequence[Rollout]:
440
+ """Retrieve rollouts filtered by status and/or explicit identifiers.
441
+
442
+ This interface supports structured filtering, sorting, and pagination so
443
+ callers can build simple dashboards without copying data out of the
444
+ store. The legacy parameters `status` and `rollout_ids` remain valid and
445
+ are treated as aliases for `status_in` and `rollout_id_in`
446
+ respectively—when both the new and deprecated parameters are supplied
447
+ the new parameters take precedence.
448
+
449
+ Args:
450
+ status_in: Optional whitelist of [`RolloutStatus`][mantisdk.RolloutStatus] values.
451
+ rollout_id_in: Optional whitelist of rollout identifiers to include.
452
+ rollout_id_contains: Optional substring match for rollout identifiers.
453
+ filter_logic: Logical operator to combine filters.
454
+ sort_by: Optional field to sort by. Must reference a numeric or string
455
+ field on [`Rollout`][mantisdk.Rollout].
456
+ sort_order: Direction to sort when `sort_by` is provided.
457
+ limit: Maximum number of rows to return. Use `-1` for "no limit".
458
+ offset: Number of rows to skip before returning results.
459
+ status: Deprecated field. Use `status_in` instead.
460
+ rollout_ids: Deprecated field. Use `rollout_id_in` instead.
461
+
462
+ Returns:
463
+ A sequence of matching rollouts (or [`AttemptedRollout`][mantisdk.AttemptedRollout]
464
+ when attempts exist). Ordering is deterministic when `sort_by` is set.
465
+ The return value is not guaranteed to be a list.
466
+
467
+ Raises:
468
+ NotImplementedError: Subclasses must implement the query.
469
+ """
470
+ raise NotImplementedError()
471
+
472
+ async def query_attempts(
473
+ self,
474
+ rollout_id: str,
475
+ *,
476
+ sort_by: Optional[str] = "sequence_id",
477
+ sort_order: Literal["asc", "desc"] = "asc",
478
+ limit: int = -1,
479
+ offset: int = 0,
480
+ ) -> Sequence[Attempt]:
481
+ """Return every attempt ever created for `rollout_id` in ascending sequence order.
482
+
483
+ The parameters allow callers to re-order or paginate the attempts so that
484
+ large retry histories can be streamed lazily.
485
+
486
+ Args:
487
+ rollout_id: Identifier of the rollout being inspected.
488
+ sort_by: Field to sort by. Must be a numeric or string field of
489
+ [`Attempt`][mantisdk.Attempt]. Defaults to `sequence_id` (oldest first).
490
+ sort_order: Order to sort by.
491
+ limit: Limit on the number of results. `-1` for unlimited.
492
+ offset: Offset into the results.
493
+
494
+ Returns:
495
+ Sequence of Attempts. Returns an empty sequence when none exist.
496
+ The return value is not guaranteed to be a list.
497
+
498
+ Raises:
499
+ NotImplementedError: Subclasses must implement the query.
500
+ ValueError: Implementations must raise when the rollout does not exist.
501
+ """
502
+ raise NotImplementedError()
503
+
504
+ async def get_rollout_by_id(self, rollout_id: str) -> Optional[Rollout]:
505
+ """Fetch a rollout by identifier without mutating its state.
506
+
507
+ Args:
508
+ rollout_id: Identifier to retrieve.
509
+
510
+ Returns:
511
+ The rollout when found, otherwise `None`.
512
+
513
+ Raises:
514
+ NotImplementedError: Subclasses must implement retrieval.
515
+ """
516
+ raise NotImplementedError()
517
+
518
+ async def get_latest_attempt(self, rollout_id: str) -> Optional[Attempt]:
519
+ """Fetch the attempt with the highest `sequence_id` for `rollout_id`.
520
+
521
+ Args:
522
+ rollout_id: Identifier to inspect.
523
+
524
+ Returns:
525
+ The most recent attempt or `None` when no attempts exist yet.
526
+
527
+ Raises:
528
+ NotImplementedError: Subclasses must implement retrieval.
529
+ ValueError: Implementations must raise when the rollout does not exist.
530
+ """
531
+ raise NotImplementedError()
532
+
533
+ async def query_resources(
534
+ self,
535
+ *,
536
+ resources_id: Optional[str] = None,
537
+ resources_id_contains: Optional[str] = None,
538
+ # Filter logic is not supported here because I can't see why it's needed.
539
+ sort_by: Optional[str] = None,
540
+ sort_order: Literal["asc", "desc"] = "asc",
541
+ limit: int = -1,
542
+ offset: int = 0,
543
+ ) -> Sequence[ResourcesUpdate]:
544
+ """List every stored resource snapshot in insertion order.
545
+
546
+ Supports lightweight filtering, sorting, and pagination for embedding in
547
+ dashboards.
548
+
549
+ Args:
550
+ resources_id: Optional identifier of the resources to include.
551
+ resources_id_contains: Optional substring match for resources identifiers.
552
+ sort_by: Optional field to sort by (must be numeric or string on
553
+ [`ResourcesUpdate`][mantisdk.ResourcesUpdate]).
554
+ sort_order: Order to sort by.
555
+ limit: Limit on the number of results. `-1` for unlimited.
556
+ offset: Offset into the results.
557
+
558
+ Returns:
559
+ [`ResourcesUpdate`][mantisdk.ResourcesUpdate] objects.
560
+ By default, resources are sorted in a deterministic but undefined order.
561
+ The return value is not guaranteed to be a list.
562
+
563
+ Raises:
564
+ NotImplementedError: Subclasses must implement retrieval.
565
+ """
566
+ raise NotImplementedError()
567
+
568
+ async def get_resources_by_id(self, resources_id: str) -> Optional[ResourcesUpdate]:
569
+ """Return a specific named resource snapshot by identifier.
570
+
571
+ Args:
572
+ resources_id: Identifier of the snapshot.
573
+
574
+ Returns:
575
+ The stored [`ResourcesUpdate`][mantisdk.ResourcesUpdate], or `None` when missing.
576
+
577
+ Raises:
578
+ NotImplementedError: Subclasses must implement retrieval.
579
+ """
580
+ raise NotImplementedError()
581
+
582
+ async def get_latest_resources(self) -> Optional[ResourcesUpdate]:
583
+ """Fetch the latest resource snapshot marked as the global default.
584
+
585
+ Returns:
586
+ The current latest [`ResourcesUpdate`][mantisdk.ResourcesUpdate], or `None` when
587
+ no resources have been registered yet.
588
+
589
+ Raises:
590
+ NotImplementedError: Subclasses must implement retrieval.
591
+ """
592
+ raise NotImplementedError()
593
+
594
+ async def get_next_span_sequence_id(self, rollout_id: str, attempt_id: str) -> int:
595
+ """Allocate the next strictly increasing sequence number used to order spans.
596
+
597
+ Implementations must retain counters so repeated calls return `1, 2, ...` without
598
+ gaps unless spans were explicitly inserted with a custom `sequence_id`. The
599
+ counter may be scoped per rollout or per attempt, but the sequence must be
600
+ strictly increasing for spans emitted by the specified attempt so traces remain
601
+ totally ordered.
602
+
603
+ See [Distributed Tracing][distributed-tracing] for detailed motivations.
604
+
605
+ Args:
606
+ rollout_id: Identifier of the rollout emitting spans.
607
+ attempt_id: Attempt identifier for the upcoming span.
608
+
609
+ Returns:
610
+ The next integer sequence identifier, unique within the attempt.
611
+
612
+ Raises:
613
+ NotImplementedError: Subclasses must provide the allocator.
614
+ ValueError: Implementations must raise when the rollout or attempt does not exist.
615
+ """
616
+ raise NotImplementedError()
617
+
618
+ async def get_many_span_sequence_ids(self, rollout_attempt_ids: Sequence[Tuple[str, str]]) -> Sequence[int]:
619
+ """Bulk allocate the next strictly increasing sequence number used to order spans.
620
+
621
+ Implementations may delegate to [`get_next_span_sequence_id()`][mantisdk.LightningStore.get_next_span_sequence_id]
622
+ for each rollout and attempt.
623
+
624
+ Args:
625
+ rollout_attempt_ids: List of tuples of rollout and attempt identifiers.
626
+
627
+ Returns:
628
+ List of sequence numbers.
629
+ """
630
+ raise NotImplementedError()
631
+
632
+ async def wait_for_rollouts(self, *, rollout_ids: List[str], timeout: Optional[float] = None) -> List[Rollout]:
633
+ """Block until the targeted rollouts reach a terminal status or the timeout expires.
634
+
635
+ Terminal statuses are `"succeeded"`, `"failed"`, and `"cancelled"`. When the timeout
636
+ elapses, implementations should return the subset of rollouts that are already terminal
637
+ and omit the rest.
638
+
639
+ !!! warning
640
+ It's dangerous and might be event-loop blocking to call this function
641
+ with a long timeout. It's a good idea to poll for the method to check
642
+ if new completed rollouts can coming. Be careful in implementing the sleep logic
643
+ to avoid busy-waiting.
644
+
645
+ Args:
646
+ rollout_ids: Identifiers of rollouts to watch.
647
+ timeout: Maximum time in seconds to wait. `None` waits indefinitely.
648
+
649
+ Returns:
650
+ Rollouts that finished before the deadline, in arbitrary order.
651
+
652
+ Raises:
653
+ NotImplementedError: Subclasses must implement waiting semantics.
654
+ ValueError: Implementations must raise when a rollout identifier is unknown.
655
+ """
656
+ raise NotImplementedError()
657
+
658
+ async def query_spans(
659
+ self,
660
+ rollout_id: str,
661
+ attempt_id: str | Literal["latest"] | None = None,
662
+ *,
663
+ # Filtering
664
+ trace_id: Optional[str] = None,
665
+ trace_id_contains: Optional[str] = None,
666
+ span_id: Optional[str] = None,
667
+ span_id_contains: Optional[str] = None,
668
+ parent_id: Optional[str] = None,
669
+ parent_id_contains: Optional[str] = None,
670
+ name: Optional[str] = None,
671
+ name_contains: Optional[str] = None,
672
+ filter_logic: Literal["and", "or"] = "and",
673
+ # Pagination
674
+ limit: int = -1,
675
+ offset: int = 0,
676
+ # Sorting
677
+ sort_by: Optional[str] = "sequence_id",
678
+ sort_order: Literal["asc", "desc"] = "asc",
679
+ ) -> Sequence[Span]:
680
+ """Return the stored spans for a rollout, optionally scoped to one attempt.
681
+
682
+ Supports a handful of filters that cover the most common debugging
683
+ scenarios (matching `trace_id`/`span_id`/`parent_id` or substring
684
+ matches on the span name). `attempt_id="latest"` acts as a convenience
685
+ that resolves the most recent attempt before evaluating filters. When
686
+ `attempt_id=None`, spans across every attempt are eligible. By default
687
+ results are sorted by `sequence_id` (oldest first). Implementations may
688
+ raise a `RuntimeError` when spans were evicted or expired.
689
+
690
+ Args:
691
+ rollout_id: Identifier of the rollout being inspected.
692
+ attempt_id: Attempt identifier to filter by. Pass `"latest"` to retrieve only the
693
+ most recent attempt, or `None` to return all spans across attempts.
694
+ trace_id: Optional trace ID to filter by.
695
+ trace_id_contains: Optional substring match for trace IDs.
696
+ span_id: Optional span ID to filter by.
697
+ span_id_contains: Optional substring match for span IDs.
698
+ parent_id: Optional parent span ID to filter by.
699
+ parent_id_contains: Optional substring match for parent span IDs.
700
+ name: Optional span name to filter by.
701
+ name_contains: Optional substring match for span names.
702
+ filter_logic: Logical operator to combine the optional filters above.
703
+ The `rollout_id` argument is always applied with AND semantics.
704
+ limit: Limit on the number of results. `-1` for unlimited.
705
+ offset: Offset into the results.
706
+ sort_by: Field to sort by. Must be a numeric or string field of
707
+ [`Span`][mantisdk.Span].
708
+ sort_order: Order to sort by.
709
+
710
+ Returns:
711
+ An ordered list of spans (possibly empty).
712
+ The return value is not guaranteed to be a list.
713
+
714
+ Raises:
715
+ NotImplementedError: Subclasses must implement the query.
716
+ ValueError: Implementations must raise when the rollout or attempt is unknown.
717
+ """
718
+ raise NotImplementedError()
719
+
720
+ async def add_resources(self, resources: NamedResources) -> ResourcesUpdate:
721
+ """Persist a new immutable snapshot of named resources and mark it as latest.
722
+
723
+ Implementations must assign a fresh `resources_id` and ensure subsequent calls to
724
+ [`get_latest_resources()`][mantisdk.LightningStore.get_latest_resources] return the
725
+ snapshot produced here.
726
+
727
+ Args:
728
+ resources: Mapping of resource names to their serialized payloads.
729
+
730
+ Returns:
731
+ The stored [`ResourcesUpdate`][mantisdk.ResourcesUpdate] including its generated id.
732
+
733
+ Raises:
734
+ NotImplementedError: Subclasses must implement resource persistence.
735
+ """
736
+ raise NotImplementedError()
737
+
738
+ async def update_resources(self, resources_id: str, resources: NamedResources) -> ResourcesUpdate:
739
+ """Overwrite or extend an existing resource snapshot and mark it as latest.
740
+
741
+ This API is typically used by algorithms that maintain mutable resources (e.g., model
742
+ checkpoints) under a stable identifier.
743
+
744
+ Args:
745
+ resources_id: Identifier of the snapshot to replace.
746
+ resources: Updated mapping of resource names to payloads.
747
+
748
+ Returns:
749
+ The persisted [`ResourcesUpdate`][mantisdk.ResourcesUpdate].
750
+
751
+ Raises:
752
+ NotImplementedError: Subclasses must implement resource persistence.
753
+ ValueError: Implementations must raise when `resources_id` does not exist.
754
+ """
755
+ raise NotImplementedError()
756
+
757
+ async def update_rollout(
758
+ self,
759
+ rollout_id: str,
760
+ input: TaskInput | Unset = UNSET,
761
+ mode: Optional[Literal["train", "val", "test"]] | Unset = UNSET,
762
+ resources_id: Optional[str] | Unset = UNSET,
763
+ status: RolloutStatus | Unset = UNSET,
764
+ config: RolloutConfig | Unset = UNSET,
765
+ metadata: Optional[Dict[str, Any]] | Unset = UNSET,
766
+ ) -> Rollout:
767
+ """Update rollout metadata and, when provided, drive status transitions.
768
+
769
+ Parameters default to the sentinel [`UNSET`][mantisdk.store.base.UNSET] to
770
+ distinguish omitted fields from explicit `None` assignments. Implementations must:
771
+
772
+ * Validate the rollout exists before mutating it.
773
+ * Replace each property when a concrete value (including `None`) is supplied.
774
+ * When the status switches into a terminal state, set `end_time` and signal any waiters.
775
+ * When the status re-enters a queueing state, ensure the rollout is enqueued exactly once.
776
+
777
+ Args:
778
+ rollout_id: Identifier of the rollout to update.
779
+ input: Replacement task payload; pass `None` to explicitly clear the input.
780
+ mode: Replacement rollout mode.
781
+ resources_id: Replacement resources snapshot reference.
782
+ status: Target rollout status.
783
+ config: Replacement retry/timeout configuration.
784
+ metadata: Replacement metadata dictionary.
785
+
786
+ Returns:
787
+ The updated rollout record.
788
+
789
+ Raises:
790
+ NotImplementedError: Subclasses must implement mutation logic.
791
+ ValueError: Implementations must raise when the rollout is unknown or the update is invalid.
792
+ """
793
+ raise NotImplementedError()
794
+
795
+ async def update_attempt(
796
+ self,
797
+ rollout_id: str,
798
+ attempt_id: str | Literal["latest"],
799
+ status: AttemptStatus | Unset = UNSET,
800
+ worker_id: str | Unset = UNSET,
801
+ last_heartbeat_time: float | Unset = UNSET,
802
+ metadata: Optional[Dict[str, Any]] | Unset = UNSET,
803
+ ) -> Attempt:
804
+ """Update attempt bookkeeping such as status, worker ownership, and heartbeats.
805
+
806
+ When `attempt_id` is `"latest"` the update must target the attempt with the highest
807
+ `sequence_id`; otherwise it must target the specific attempt. Implementations should
808
+ propagate status changes to the rollout (for example
809
+ via [`rollout_status_from_attempt()`][mantisdk.store.utils.rollout_status_from_attempt])
810
+ once the latest attempt transitions to a terminal state.
811
+
812
+ Similar to [`update_rollout()`][mantisdk.LightningStore.update_rollout],
813
+ parameters also default to the sentinel [`UNSET`][mantisdk.store.base.UNSET].
814
+
815
+ If `worker_id` is present, the worker status will be updated following the rules:
816
+
817
+ 1. If attempt status is "succeeded" or "failed", the corresponding worker status will be set to "idle".
818
+ 2. If attempt status is "unresponsive" or "timeout", the corresponding worker status will be set to "unknown".
819
+ 3. Otherwise, the worker status will be set to "busy".
820
+
821
+ Args:
822
+ rollout_id: Identifier of the rollout whose attempt will be updated.
823
+ attempt_id: Attempt identifier or `"latest"` as a convenience.
824
+ status: Replacement attempt status. Terminal statuses must set `end_time`.
825
+ worker_id: Identifier for the worker currently processing the attempt.
826
+ last_heartbeat_time: Wall-clock timestamp (seconds) of the latest heartbeat/span.
827
+ metadata: Replacement metadata dictionary.
828
+
829
+ Returns:
830
+ The updated attempt record.
831
+
832
+ Raises:
833
+ NotImplementedError: Subclasses must implement mutation logic.
834
+ ValueError: Implementations must raise when the rollout or attempt is unknown.
835
+ """
836
+ raise NotImplementedError()
837
+
838
+ async def query_workers(
839
+ self,
840
+ *,
841
+ status_in: Optional[Sequence[WorkerStatus]] = None,
842
+ worker_id_contains: Optional[str] = None,
843
+ filter_logic: Literal["and", "or"] = "and",
844
+ sort_by: Optional[str] = None,
845
+ sort_order: Literal["asc", "desc"] = "asc",
846
+ limit: int = -1,
847
+ offset: int = 0,
848
+ ) -> Sequence[Worker]:
849
+ """Query all workers in the system.
850
+
851
+ Args:
852
+ status_in: Optional whitelist of [`WorkerStatus`][mantisdk.WorkerStatus] values.
853
+ worker_id_contains: Optional substring match for worker identifiers.
854
+ filter_logic: Logical operator to combine the optional filters above.
855
+ sort_by: Field to sort by. Must be a numeric or string field of [`Worker`][mantisdk.Worker].
856
+ sort_order: Order to sort by.
857
+ limit: Limit on the number of results. `-1` for unlimited.
858
+ offset: Offset into the results.
859
+
860
+ Returns:
861
+ Sequence of Workers. Returns an empty sequence when none exist.
862
+ The return value is not guaranteed to be a list.
863
+ """
864
+ raise NotImplementedError()
865
+
866
+ async def get_worker_by_id(self, worker_id: str) -> Optional[Worker]:
867
+ """Retrieve a single worker by identifier.
868
+
869
+ Args:
870
+ worker_id: Identifier of the worker.
871
+
872
+ Returns:
873
+ The worker record if it exists, otherwise `None`.
874
+
875
+ Raises:
876
+ NotImplementedError: Subclasses must implement lookup semantics.
877
+ """
878
+ raise NotImplementedError()
879
+
880
+ async def update_worker(
881
+ self,
882
+ worker_id: str,
883
+ heartbeat_stats: Dict[str, Any] | Unset = UNSET,
884
+ ) -> Worker:
885
+ """Record a heartbeat for `worker_id` and refresh telemetry.
886
+
887
+ Implementations must treat this API as heartbeat-only: it should snapshot
888
+ the latest stats when provided, stamp `last_heartbeat_time` with the
889
+ current wall clock, and rely on other store mutations (`dequeue_rollout`,
890
+ `update_attempt`, etc.) to drive the worker's busy/idle status,
891
+ assignment, and activity timestamps.
892
+
893
+ Args:
894
+ worker_id: Identifier of the worker to update.
895
+ heartbeat_stats: Replacement worker heartbeat statistics (non-null when provided).
896
+ """
897
+ raise NotImplementedError()