mantisdk 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mantisdk might be problematic. Click here for more details.
- mantisdk/__init__.py +22 -0
- mantisdk/adapter/__init__.py +15 -0
- mantisdk/adapter/base.py +94 -0
- mantisdk/adapter/messages.py +270 -0
- mantisdk/adapter/triplet.py +1028 -0
- mantisdk/algorithm/__init__.py +39 -0
- mantisdk/algorithm/apo/__init__.py +5 -0
- mantisdk/algorithm/apo/apo.py +889 -0
- mantisdk/algorithm/apo/prompts/apply_edit_variant01.poml +22 -0
- mantisdk/algorithm/apo/prompts/apply_edit_variant02.poml +18 -0
- mantisdk/algorithm/apo/prompts/text_gradient_variant01.poml +18 -0
- mantisdk/algorithm/apo/prompts/text_gradient_variant02.poml +16 -0
- mantisdk/algorithm/apo/prompts/text_gradient_variant03.poml +107 -0
- mantisdk/algorithm/base.py +162 -0
- mantisdk/algorithm/decorator.py +264 -0
- mantisdk/algorithm/fast.py +250 -0
- mantisdk/algorithm/gepa/__init__.py +59 -0
- mantisdk/algorithm/gepa/adapter.py +459 -0
- mantisdk/algorithm/gepa/gepa.py +364 -0
- mantisdk/algorithm/gepa/lib/__init__.py +18 -0
- mantisdk/algorithm/gepa/lib/adapters/README.md +12 -0
- mantisdk/algorithm/gepa/lib/adapters/__init__.py +0 -0
- mantisdk/algorithm/gepa/lib/adapters/anymaths_adapter/README.md +341 -0
- mantisdk/algorithm/gepa/lib/adapters/anymaths_adapter/__init__.py +1 -0
- mantisdk/algorithm/gepa/lib/adapters/anymaths_adapter/anymaths_adapter.py +174 -0
- mantisdk/algorithm/gepa/lib/adapters/anymaths_adapter/requirements.txt +1 -0
- mantisdk/algorithm/gepa/lib/adapters/default_adapter/README.md +0 -0
- mantisdk/algorithm/gepa/lib/adapters/default_adapter/__init__.py +0 -0
- mantisdk/algorithm/gepa/lib/adapters/default_adapter/default_adapter.py +209 -0
- mantisdk/algorithm/gepa/lib/adapters/dspy_adapter/README.md +7 -0
- mantisdk/algorithm/gepa/lib/adapters/dspy_adapter/__init__.py +0 -0
- mantisdk/algorithm/gepa/lib/adapters/dspy_adapter/dspy_adapter.py +307 -0
- mantisdk/algorithm/gepa/lib/adapters/dspy_full_program_adapter/README.md +99 -0
- mantisdk/algorithm/gepa/lib/adapters/dspy_full_program_adapter/dspy_program_proposal_signature.py +137 -0
- mantisdk/algorithm/gepa/lib/adapters/dspy_full_program_adapter/full_program_adapter.py +266 -0
- mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/GEPA_RAG.md +621 -0
- mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/__init__.py +56 -0
- mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/evaluation_metrics.py +226 -0
- mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/generic_rag_adapter.py +496 -0
- mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/rag_pipeline.py +238 -0
- mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_store_interface.py +212 -0
- mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_stores/__init__.py +2 -0
- mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_stores/chroma_store.py +196 -0
- mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_stores/lancedb_store.py +422 -0
- mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_stores/milvus_store.py +409 -0
- mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_stores/qdrant_store.py +368 -0
- mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_stores/weaviate_store.py +418 -0
- mantisdk/algorithm/gepa/lib/adapters/mcp_adapter/README.md +552 -0
- mantisdk/algorithm/gepa/lib/adapters/mcp_adapter/__init__.py +37 -0
- mantisdk/algorithm/gepa/lib/adapters/mcp_adapter/mcp_adapter.py +705 -0
- mantisdk/algorithm/gepa/lib/adapters/mcp_adapter/mcp_client.py +364 -0
- mantisdk/algorithm/gepa/lib/adapters/terminal_bench_adapter/README.md +9 -0
- mantisdk/algorithm/gepa/lib/adapters/terminal_bench_adapter/__init__.py +0 -0
- mantisdk/algorithm/gepa/lib/adapters/terminal_bench_adapter/terminal_bench_adapter.py +217 -0
- mantisdk/algorithm/gepa/lib/api.py +375 -0
- mantisdk/algorithm/gepa/lib/core/__init__.py +0 -0
- mantisdk/algorithm/gepa/lib/core/adapter.py +180 -0
- mantisdk/algorithm/gepa/lib/core/data_loader.py +74 -0
- mantisdk/algorithm/gepa/lib/core/engine.py +356 -0
- mantisdk/algorithm/gepa/lib/core/result.py +233 -0
- mantisdk/algorithm/gepa/lib/core/state.py +636 -0
- mantisdk/algorithm/gepa/lib/examples/__init__.py +0 -0
- mantisdk/algorithm/gepa/lib/examples/aime.py +24 -0
- mantisdk/algorithm/gepa/lib/examples/anymaths-bench/eval_default.py +111 -0
- mantisdk/algorithm/gepa/lib/examples/anymaths-bench/prompt-templates/instruction_prompt.txt +9 -0
- mantisdk/algorithm/gepa/lib/examples/anymaths-bench/prompt-templates/optimal_prompt.txt +24 -0
- mantisdk/algorithm/gepa/lib/examples/anymaths-bench/train_anymaths.py +177 -0
- mantisdk/algorithm/gepa/lib/examples/dspy_full_program_evolution/arc_agi.ipynb +25705 -0
- mantisdk/algorithm/gepa/lib/examples/dspy_full_program_evolution/example.ipynb +348 -0
- mantisdk/algorithm/gepa/lib/examples/mcp_adapter/__init__.py +4 -0
- mantisdk/algorithm/gepa/lib/examples/mcp_adapter/mcp_optimization_example.py +455 -0
- mantisdk/algorithm/gepa/lib/examples/rag_adapter/RAG_GUIDE.md +613 -0
- mantisdk/algorithm/gepa/lib/examples/rag_adapter/__init__.py +9 -0
- mantisdk/algorithm/gepa/lib/examples/rag_adapter/rag_optimization.py +824 -0
- mantisdk/algorithm/gepa/lib/examples/rag_adapter/requirements-rag.txt +29 -0
- mantisdk/algorithm/gepa/lib/examples/terminal-bench/prompt-templates/instruction_prompt.txt +16 -0
- mantisdk/algorithm/gepa/lib/examples/terminal-bench/prompt-templates/terminus.txt +9 -0
- mantisdk/algorithm/gepa/lib/examples/terminal-bench/train_terminus.py +161 -0
- mantisdk/algorithm/gepa/lib/gepa_utils.py +117 -0
- mantisdk/algorithm/gepa/lib/logging/__init__.py +0 -0
- mantisdk/algorithm/gepa/lib/logging/experiment_tracker.py +187 -0
- mantisdk/algorithm/gepa/lib/logging/logger.py +75 -0
- mantisdk/algorithm/gepa/lib/logging/utils.py +103 -0
- mantisdk/algorithm/gepa/lib/proposer/__init__.py +0 -0
- mantisdk/algorithm/gepa/lib/proposer/base.py +31 -0
- mantisdk/algorithm/gepa/lib/proposer/merge.py +357 -0
- mantisdk/algorithm/gepa/lib/proposer/reflective_mutation/__init__.py +0 -0
- mantisdk/algorithm/gepa/lib/proposer/reflective_mutation/base.py +49 -0
- mantisdk/algorithm/gepa/lib/proposer/reflective_mutation/reflective_mutation.py +176 -0
- mantisdk/algorithm/gepa/lib/py.typed +0 -0
- mantisdk/algorithm/gepa/lib/strategies/__init__.py +0 -0
- mantisdk/algorithm/gepa/lib/strategies/batch_sampler.py +77 -0
- mantisdk/algorithm/gepa/lib/strategies/candidate_selector.py +50 -0
- mantisdk/algorithm/gepa/lib/strategies/component_selector.py +36 -0
- mantisdk/algorithm/gepa/lib/strategies/eval_policy.py +64 -0
- mantisdk/algorithm/gepa/lib/strategies/instruction_proposal.py +127 -0
- mantisdk/algorithm/gepa/lib/utils/__init__.py +10 -0
- mantisdk/algorithm/gepa/lib/utils/stop_condition.py +196 -0
- mantisdk/algorithm/gepa/tracing.py +105 -0
- mantisdk/algorithm/utils.py +177 -0
- mantisdk/algorithm/verl/__init__.py +5 -0
- mantisdk/algorithm/verl/interface.py +202 -0
- mantisdk/cli/__init__.py +56 -0
- mantisdk/cli/prometheus.py +115 -0
- mantisdk/cli/store.py +131 -0
- mantisdk/cli/vllm.py +29 -0
- mantisdk/client.py +408 -0
- mantisdk/config.py +348 -0
- mantisdk/emitter/__init__.py +43 -0
- mantisdk/emitter/annotation.py +370 -0
- mantisdk/emitter/exception.py +54 -0
- mantisdk/emitter/message.py +61 -0
- mantisdk/emitter/object.py +117 -0
- mantisdk/emitter/reward.py +320 -0
- mantisdk/env_var.py +156 -0
- mantisdk/execution/__init__.py +15 -0
- mantisdk/execution/base.py +64 -0
- mantisdk/execution/client_server.py +443 -0
- mantisdk/execution/events.py +69 -0
- mantisdk/execution/inter_process.py +16 -0
- mantisdk/execution/shared_memory.py +282 -0
- mantisdk/instrumentation/__init__.py +119 -0
- mantisdk/instrumentation/agentops.py +314 -0
- mantisdk/instrumentation/agentops_langchain.py +45 -0
- mantisdk/instrumentation/litellm.py +83 -0
- mantisdk/instrumentation/vllm.py +81 -0
- mantisdk/instrumentation/weave.py +500 -0
- mantisdk/litagent/__init__.py +11 -0
- mantisdk/litagent/decorator.py +536 -0
- mantisdk/litagent/litagent.py +252 -0
- mantisdk/llm_proxy.py +1890 -0
- mantisdk/logging.py +370 -0
- mantisdk/reward.py +7 -0
- mantisdk/runner/__init__.py +11 -0
- mantisdk/runner/agent.py +845 -0
- mantisdk/runner/base.py +182 -0
- mantisdk/runner/legacy.py +309 -0
- mantisdk/semconv.py +170 -0
- mantisdk/server.py +401 -0
- mantisdk/store/__init__.py +23 -0
- mantisdk/store/base.py +897 -0
- mantisdk/store/client_server.py +2092 -0
- mantisdk/store/collection/__init__.py +30 -0
- mantisdk/store/collection/base.py +587 -0
- mantisdk/store/collection/memory.py +970 -0
- mantisdk/store/collection/mongo.py +1412 -0
- mantisdk/store/collection_based.py +1823 -0
- mantisdk/store/insight.py +648 -0
- mantisdk/store/listener.py +58 -0
- mantisdk/store/memory.py +396 -0
- mantisdk/store/mongo.py +165 -0
- mantisdk/store/sqlite.py +3 -0
- mantisdk/store/threading.py +357 -0
- mantisdk/store/utils.py +142 -0
- mantisdk/tracer/__init__.py +16 -0
- mantisdk/tracer/agentops.py +242 -0
- mantisdk/tracer/base.py +287 -0
- mantisdk/tracer/dummy.py +106 -0
- mantisdk/tracer/otel.py +555 -0
- mantisdk/tracer/weave.py +677 -0
- mantisdk/trainer/__init__.py +6 -0
- mantisdk/trainer/init_utils.py +263 -0
- mantisdk/trainer/legacy.py +367 -0
- mantisdk/trainer/registry.py +12 -0
- mantisdk/trainer/trainer.py +618 -0
- mantisdk/types/__init__.py +6 -0
- mantisdk/types/core.py +553 -0
- mantisdk/types/resources.py +204 -0
- mantisdk/types/tracer.py +515 -0
- mantisdk/types/tracing.py +218 -0
- mantisdk/utils/__init__.py +1 -0
- mantisdk/utils/id.py +18 -0
- mantisdk/utils/metrics.py +1025 -0
- mantisdk/utils/otel.py +578 -0
- mantisdk/utils/otlp.py +536 -0
- mantisdk/utils/server_launcher.py +1045 -0
- mantisdk/utils/system_snapshot.py +81 -0
- mantisdk/verl/__init__.py +8 -0
- mantisdk/verl/__main__.py +6 -0
- mantisdk/verl/async_server.py +46 -0
- mantisdk/verl/config.yaml +27 -0
- mantisdk/verl/daemon.py +1154 -0
- mantisdk/verl/dataset.py +44 -0
- mantisdk/verl/entrypoint.py +248 -0
- mantisdk/verl/trainer.py +549 -0
- mantisdk-0.1.0.dist-info/METADATA +119 -0
- mantisdk-0.1.0.dist-info/RECORD +190 -0
- mantisdk-0.1.0.dist-info/WHEEL +4 -0
- mantisdk-0.1.0.dist-info/entry_points.txt +2 -0
- mantisdk-0.1.0.dist-info/licenses/LICENSE +19 -0
|
@@ -0,0 +1,320 @@
|
|
|
1
|
+
# Copyright (c) Microsoft. All rights reserved.
|
|
2
|
+
|
|
3
|
+
"""Helpers for emitting reward spans and integrating with AgentOps telemetry."""
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
import inspect
|
|
7
|
+
import json
|
|
8
|
+
import logging
|
|
9
|
+
import warnings
|
|
10
|
+
from typing import (
|
|
11
|
+
Any,
|
|
12
|
+
Callable,
|
|
13
|
+
Dict,
|
|
14
|
+
List,
|
|
15
|
+
Literal,
|
|
16
|
+
Optional,
|
|
17
|
+
Sequence,
|
|
18
|
+
TypedDict,
|
|
19
|
+
TypeVar,
|
|
20
|
+
cast,
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
from pydantic import TypeAdapter
|
|
24
|
+
|
|
25
|
+
from mantisdk.semconv import AGL_ANNOTATION, LightningSpanAttributes, RewardPydanticModel
|
|
26
|
+
from mantisdk.types import SpanCoreFields, SpanLike
|
|
27
|
+
from mantisdk.utils.otel import filter_and_unflatten_attributes
|
|
28
|
+
|
|
29
|
+
# NOTE: emit_annotation is imported lazily in emit_reward() to avoid circular import
|
|
30
|
+
# The cycle is: emitter/__init__ → annotation → tracer → store → insight → reward → annotation
|
|
31
|
+
|
|
32
|
+
logger = logging.getLogger(__name__)
|
|
33
|
+
|
|
34
|
+
__all__ = [
|
|
35
|
+
"reward",
|
|
36
|
+
"emit_reward",
|
|
37
|
+
"get_reward_value",
|
|
38
|
+
"get_rewards_from_span",
|
|
39
|
+
"is_reward_span",
|
|
40
|
+
"find_reward_spans",
|
|
41
|
+
"find_final_reward",
|
|
42
|
+
]
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class RewardDimension(TypedDict):
|
|
46
|
+
"""Type representing a single dimension in a multi-dimensional reward."""
|
|
47
|
+
|
|
48
|
+
name: str
|
|
49
|
+
value: float
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class _RewardSpanData(TypedDict):
|
|
53
|
+
type: Literal["reward"]
|
|
54
|
+
value: Optional[float]
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
_FnType = TypeVar("_FnType", bound=Callable[..., Any])
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def _agentops_initialized() -> bool:
|
|
61
|
+
"""Return `True` when the AgentOps client has been configured."""
|
|
62
|
+
import agentops
|
|
63
|
+
|
|
64
|
+
return agentops.get_client().initialized
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def reward(fn: _FnType) -> _FnType:
|
|
68
|
+
"""Decorate a reward function so its outputs are tracked as spans.
|
|
69
|
+
|
|
70
|
+
The decorator integrates with AgentOps when it is available and falls back to
|
|
71
|
+
the built-in telemetry otherwise. Both synchronous and asynchronous functions
|
|
72
|
+
are supported transparently.
|
|
73
|
+
|
|
74
|
+
Deprecated:
|
|
75
|
+
This decorator is deprecated. Use [`emit_reward`][mantisdk.emit_reward] instead.
|
|
76
|
+
|
|
77
|
+
Args:
|
|
78
|
+
fn: Callable that produces a numeric reward.
|
|
79
|
+
|
|
80
|
+
Returns:
|
|
81
|
+
Wrapped callable that preserves the original signature.
|
|
82
|
+
"""
|
|
83
|
+
|
|
84
|
+
from agentops.sdk.decorators import operation
|
|
85
|
+
|
|
86
|
+
def wrap_result(result: Optional[float]) -> _RewardSpanData:
|
|
87
|
+
"""Normalize the reward value into the span payload format."""
|
|
88
|
+
if result is None:
|
|
89
|
+
return {"type": "reward", "value": None}
|
|
90
|
+
if not isinstance(result, (float, int)): # type: ignore
|
|
91
|
+
warnings.warn(f"Reward is ignored because it is not a number: {result}")
|
|
92
|
+
return {"type": "reward", "value": None}
|
|
93
|
+
return {"type": "reward", "value": float(result)}
|
|
94
|
+
|
|
95
|
+
# Check if the function is async
|
|
96
|
+
is_async = asyncio.iscoroutinefunction(fn) or inspect.iscoroutinefunction(fn)
|
|
97
|
+
|
|
98
|
+
if is_async:
|
|
99
|
+
|
|
100
|
+
async def wrapper_async(*args: Any, **kwargs: Any) -> Any:
|
|
101
|
+
if not _agentops_initialized():
|
|
102
|
+
# Track the reward without AgentOps
|
|
103
|
+
result = await fn(*args, **kwargs)
|
|
104
|
+
emit_reward(cast(float, result))
|
|
105
|
+
return result
|
|
106
|
+
|
|
107
|
+
result: Optional[float] = None
|
|
108
|
+
|
|
109
|
+
@operation
|
|
110
|
+
async def agentops_reward_operation() -> _RewardSpanData:
|
|
111
|
+
# The reward function we are interested in tracing
|
|
112
|
+
# It takes zero inputs and return a formatted dict
|
|
113
|
+
nonlocal result
|
|
114
|
+
result = await fn(*args, **kwargs)
|
|
115
|
+
return wrap_result(result)
|
|
116
|
+
|
|
117
|
+
await agentops_reward_operation()
|
|
118
|
+
return result
|
|
119
|
+
|
|
120
|
+
return wrapper_async # type: ignore
|
|
121
|
+
|
|
122
|
+
else:
|
|
123
|
+
|
|
124
|
+
def wrapper(*args: Any, **kwargs: Any) -> Any:
|
|
125
|
+
if not _agentops_initialized():
|
|
126
|
+
# Track the reward without AgentOps
|
|
127
|
+
result = fn(*args, **kwargs)
|
|
128
|
+
emit_reward(cast(float, result))
|
|
129
|
+
return result
|
|
130
|
+
|
|
131
|
+
result: Optional[float] = None
|
|
132
|
+
|
|
133
|
+
@operation
|
|
134
|
+
def agentops_reward_operation() -> _RewardSpanData:
|
|
135
|
+
nonlocal result
|
|
136
|
+
result = fn(*args, **kwargs)
|
|
137
|
+
return wrap_result(result)
|
|
138
|
+
|
|
139
|
+
agentops_reward_operation()
|
|
140
|
+
return result
|
|
141
|
+
|
|
142
|
+
return wrapper # type: ignore
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def emit_reward(
|
|
146
|
+
reward: float | Dict[str, Any],
|
|
147
|
+
*,
|
|
148
|
+
primary_key: str | None = None,
|
|
149
|
+
attributes: Dict[str, Any] | None = None,
|
|
150
|
+
propagate: bool = True,
|
|
151
|
+
) -> SpanCoreFields:
|
|
152
|
+
"""Emit a reward value as an OpenTelemetry span.
|
|
153
|
+
|
|
154
|
+
Examples:
|
|
155
|
+
Emit a single-dimensional reward:
|
|
156
|
+
>>> emit_reward(1.0)
|
|
157
|
+
|
|
158
|
+
Emit multi-dimensional rewards:
|
|
159
|
+
>>> emit_reward({"task_completion": 1.0, "efficiency": 0.8}, primary_key="task_completion")
|
|
160
|
+
|
|
161
|
+
Emit a reward with additional attributes (for example linking to another response span):
|
|
162
|
+
>>> from mantisdk.utils.otel import make_link_attributes
|
|
163
|
+
>>> emit_reward(0.5, attributes=make_link_attributes({"gen_ai.response.id": "response-123"}))
|
|
164
|
+
|
|
165
|
+
Or adding tags onto the reward span:
|
|
166
|
+
>>> from mantisdk.utils.otel import make_tag_attributes
|
|
167
|
+
>>> emit_reward(0.7, attributes=make_tag_attributes(["fast", "reliable"]))
|
|
168
|
+
|
|
169
|
+
Args:
|
|
170
|
+
reward: Numeric reward to record. Integers and booleans are converted to
|
|
171
|
+
floating point numbers for consistency.
|
|
172
|
+
Use a dictionary to represent a multi-dimensional reward.
|
|
173
|
+
attributes: Other optional span attributes.
|
|
174
|
+
propagate: Whether to propagate the span to exporters automatically.
|
|
175
|
+
|
|
176
|
+
Returns:
|
|
177
|
+
Span core fields capturing the recorded reward.
|
|
178
|
+
"""
|
|
179
|
+
# Lazy import to avoid circular dependency
|
|
180
|
+
from .annotation import emit_annotation
|
|
181
|
+
|
|
182
|
+
logger.debug(f"Emitting reward: {reward}")
|
|
183
|
+
reward_dimensions: List[RewardDimension] = []
|
|
184
|
+
if isinstance(reward, dict):
|
|
185
|
+
reward_dict: Dict[str, float] = {}
|
|
186
|
+
for k, v in reward.items():
|
|
187
|
+
if isinstance(v, (int, bool)):
|
|
188
|
+
reward_dict[k] = float(v)
|
|
189
|
+
elif isinstance(v, float):
|
|
190
|
+
reward_dict[k] = v
|
|
191
|
+
else:
|
|
192
|
+
raise ValueError(f"Reward value must be a number, got: {type(v)} for key {k}")
|
|
193
|
+
if primary_key is None:
|
|
194
|
+
raise ValueError("When emitting a multi-dimensional reward as a dict, primary_key must be provided.")
|
|
195
|
+
if primary_key not in reward_dict:
|
|
196
|
+
raise ValueError(f"Primary key '{primary_key}' not found in reward dict keys: {list(reward_dict.keys())}")
|
|
197
|
+
reward_dimensions.append(RewardDimension(name=primary_key, value=reward_dict[primary_key]))
|
|
198
|
+
for k, v in reward_dict.items():
|
|
199
|
+
if k != primary_key:
|
|
200
|
+
reward_dimensions.append(RewardDimension(name=k, value=v))
|
|
201
|
+
else:
|
|
202
|
+
if isinstance(reward, (int, bool)):
|
|
203
|
+
reward = float(reward)
|
|
204
|
+
elif not isinstance(reward, float): # pyright: ignore[reportUnnecessaryIsInstance]
|
|
205
|
+
raise TypeError(f"Reward must be a number, got: {type(reward)}")
|
|
206
|
+
reward_dimensions.append(RewardDimension(name="primary", value=reward))
|
|
207
|
+
|
|
208
|
+
return emit_annotation(
|
|
209
|
+
{LightningSpanAttributes.REWARD.value: reward_dimensions, **(attributes or {})}, propagate=propagate
|
|
210
|
+
)
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
def get_reward_value(span: SpanLike) -> Optional[float]:
|
|
214
|
+
"""Extract the reward value from a span, if available.
|
|
215
|
+
|
|
216
|
+
Args:
|
|
217
|
+
span: Span object produced by AgentOps or Mantisdk emitters.
|
|
218
|
+
|
|
219
|
+
Returns:
|
|
220
|
+
The primary reward encoded in the span or `None` when the span does not represent a reward.
|
|
221
|
+
"""
|
|
222
|
+
# v0.3+ emit reward format
|
|
223
|
+
reward_list = get_rewards_from_span(span)
|
|
224
|
+
if reward_list:
|
|
225
|
+
# Reward list is ordered and the first element is the primary reward
|
|
226
|
+
return reward_list[0].value
|
|
227
|
+
|
|
228
|
+
for key in [
|
|
229
|
+
"agentops.task.output", # newer versions of agentops
|
|
230
|
+
"agentops.entity.output",
|
|
231
|
+
]:
|
|
232
|
+
reward_dict: Dict[str, Any] | None = None
|
|
233
|
+
if span.attributes:
|
|
234
|
+
output = span.attributes.get(key)
|
|
235
|
+
if output:
|
|
236
|
+
if isinstance(output, dict):
|
|
237
|
+
reward_dict = cast(Dict[str, Any], output)
|
|
238
|
+
elif isinstance(output, str):
|
|
239
|
+
try:
|
|
240
|
+
reward_dict = cast(Dict[str, Any], json.loads(output))
|
|
241
|
+
except json.JSONDecodeError:
|
|
242
|
+
reward_dict = None
|
|
243
|
+
|
|
244
|
+
if reward_dict and reward_dict.get("type") == "reward":
|
|
245
|
+
reward_value = reward_dict.get("value", None)
|
|
246
|
+
if reward_value is None:
|
|
247
|
+
return None
|
|
248
|
+
if not isinstance(reward_value, float):
|
|
249
|
+
logger.error(f"Reward is not a number, got: {type(reward_value)}. This may cause undefined behaviors.")
|
|
250
|
+
logger.warning(
|
|
251
|
+
f"Extracted reward {reward_value} from AgentOps. This format is deprecated, please migrate to using `emit_reward`."
|
|
252
|
+
)
|
|
253
|
+
return cast(float, reward_value)
|
|
254
|
+
|
|
255
|
+
# v0.2 emit reward format
|
|
256
|
+
if span.name == AGL_ANNOTATION and span.attributes:
|
|
257
|
+
reward_value = span.attributes.get("reward", None)
|
|
258
|
+
if reward_value is None:
|
|
259
|
+
return None
|
|
260
|
+
if not isinstance(reward_value, float):
|
|
261
|
+
logger.error(f"Reward is not a number, got: {type(reward_value)}. This may cause undefined behaviors.")
|
|
262
|
+
logger.warning(
|
|
263
|
+
f"Extracted reward {reward_value} from a legacy version of reward span. You might have inconsistent mantisdk versions."
|
|
264
|
+
)
|
|
265
|
+
return cast(float, reward_value)
|
|
266
|
+
|
|
267
|
+
return None
|
|
268
|
+
|
|
269
|
+
|
|
270
|
+
def get_rewards_from_span(span: SpanLike) -> List[RewardPydanticModel]:
|
|
271
|
+
"""Extract the reward as a list from a span, if available.
|
|
272
|
+
|
|
273
|
+
Args:
|
|
274
|
+
span: Span object produced by AgentOps or Mantisdk emitters.
|
|
275
|
+
|
|
276
|
+
Returns:
|
|
277
|
+
A list of reward dimensions encoded in the span or an empty list when the span does not represent a reward.
|
|
278
|
+
"""
|
|
279
|
+
if span.attributes and any(key.startswith(LightningSpanAttributes.REWARD.value) for key in span.attributes):
|
|
280
|
+
reward_attr = filter_and_unflatten_attributes(
|
|
281
|
+
cast(Any, span.attributes or {}), LightningSpanAttributes.REWARD.value
|
|
282
|
+
)
|
|
283
|
+
recovered_rewards = TypeAdapter(List[RewardPydanticModel]).validate_python(reward_attr)
|
|
284
|
+
return recovered_rewards
|
|
285
|
+
else:
|
|
286
|
+
return []
|
|
287
|
+
|
|
288
|
+
|
|
289
|
+
def is_reward_span(span: SpanLike) -> bool:
|
|
290
|
+
"""Return ``True`` when the provided span encodes a reward value."""
|
|
291
|
+
maybe_reward = get_reward_value(span)
|
|
292
|
+
return maybe_reward is not None
|
|
293
|
+
|
|
294
|
+
|
|
295
|
+
def find_reward_spans(spans: Sequence[SpanLike]) -> List[SpanLike]:
|
|
296
|
+
"""Return all reward spans in the provided sequence.
|
|
297
|
+
|
|
298
|
+
Args:
|
|
299
|
+
spans: Sequence containing [`ReadableSpan`](https://opentelemetry.io/docs/concepts/signals/traces/) objects or mocked span-like values.
|
|
300
|
+
|
|
301
|
+
Returns:
|
|
302
|
+
List of spans that could be parsed as rewards.
|
|
303
|
+
"""
|
|
304
|
+
return [span for span in spans if is_reward_span(span)]
|
|
305
|
+
|
|
306
|
+
|
|
307
|
+
def find_final_reward(spans: Sequence[SpanLike]) -> Optional[float]:
|
|
308
|
+
"""Return the last reward value present in the provided spans.
|
|
309
|
+
|
|
310
|
+
Args:
|
|
311
|
+
spans: Sequence containing [`ReadableSpan`](https://opentelemetry.io/docs/concepts/signals/traces/) objects or mocked span-like values.
|
|
312
|
+
|
|
313
|
+
Returns:
|
|
314
|
+
Reward value from the latest reward span, or `None` when none are found.
|
|
315
|
+
"""
|
|
316
|
+
for span in reversed(spans):
|
|
317
|
+
reward = get_reward_value(span)
|
|
318
|
+
if reward is not None:
|
|
319
|
+
return reward
|
|
320
|
+
return None
|
mantisdk/env_var.py
ADDED
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
# Copyright (c) Microsoft. All rights reserved.
|
|
2
|
+
|
|
3
|
+
"""Environment variable managements."""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
import os
|
|
8
|
+
from enum import Enum
|
|
9
|
+
from typing import overload
|
|
10
|
+
|
|
11
|
+
__all__ = [
|
|
12
|
+
"LightningEnvVar",
|
|
13
|
+
"resolve_bool_env_var",
|
|
14
|
+
"resolve_int_env_var",
|
|
15
|
+
"resolve_str_env_var",
|
|
16
|
+
]
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class LightningEnvVar(Enum):
|
|
20
|
+
"""Environment variables for Mantisdk."""
|
|
21
|
+
|
|
22
|
+
AGL_EMITTER_DEBUG = "AGL_EMITTER_DEBUG"
|
|
23
|
+
"""Enable debug logging for the emitter."""
|
|
24
|
+
|
|
25
|
+
AGL_MANAGED_STORE = "AGL_MANAGED_STORE"
|
|
26
|
+
"""If yes, the [`ExecutionStrategy`][mantisdk.ExecutionStrategy]
|
|
27
|
+
constructs LightningStore wrappers automatically. When `False` the provided
|
|
28
|
+
`store` is passed directly to the bundles, allowing callers to manage
|
|
29
|
+
store wrappers manually."""
|
|
30
|
+
|
|
31
|
+
AGL_CURRENT_ROLE = "AGL_CURRENT_ROLE"
|
|
32
|
+
"""Which side(s) to run in this process. Used in
|
|
33
|
+
[`ClientServerExecutionStrategy`][mantisdk.ClientServerExecutionStrategy]."""
|
|
34
|
+
|
|
35
|
+
AGL_SERVER_HOST = "AGL_SERVER_HOST"
|
|
36
|
+
"""Interface the [`LightningStoreServer`][mantisdk.LightningStoreServer]
|
|
37
|
+
binds to when running the algorithm bundle locally."""
|
|
38
|
+
|
|
39
|
+
AGL_SERVER_PORT = "AGL_SERVER_PORT"
|
|
40
|
+
"""Port the [`LightningStoreServer`][mantisdk.LightningStoreServer] listens to."""
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
_TRUTHY_VALUES = {"1", "true", "yes", "on"}
|
|
44
|
+
_FALSY_VALUES = {"0", "false", "no", "off"}
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
@overload
|
|
48
|
+
def resolve_bool_env_var(env_var: LightningEnvVar, override: bool, fallback: bool) -> bool: ...
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
@overload
|
|
52
|
+
def resolve_bool_env_var(env_var: LightningEnvVar, *, fallback: bool) -> bool: ...
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
@overload
|
|
56
|
+
def resolve_bool_env_var(
|
|
57
|
+
env_var: LightningEnvVar, override: bool | None = None, fallback: bool | None = None
|
|
58
|
+
) -> bool | None: ...
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def resolve_bool_env_var(
|
|
62
|
+
env_var: LightningEnvVar, override: bool | None = None, fallback: bool | None = None
|
|
63
|
+
) -> bool | None:
|
|
64
|
+
"""Resolve a boolean environment variable.
|
|
65
|
+
|
|
66
|
+
Args:
|
|
67
|
+
env_var: The environment variable to resolve.
|
|
68
|
+
override: Optional override supplied by the caller.
|
|
69
|
+
fallback: Default value if the environment variable is not set.
|
|
70
|
+
"""
|
|
71
|
+
|
|
72
|
+
if override is not None:
|
|
73
|
+
return override
|
|
74
|
+
|
|
75
|
+
env_value = os.getenv(env_var.value)
|
|
76
|
+
if env_value is None:
|
|
77
|
+
return fallback
|
|
78
|
+
|
|
79
|
+
normalized = env_value.strip().lower()
|
|
80
|
+
if normalized in _TRUTHY_VALUES:
|
|
81
|
+
return True
|
|
82
|
+
if normalized in _FALSY_VALUES:
|
|
83
|
+
return False
|
|
84
|
+
|
|
85
|
+
raise ValueError(f"{env_var.value} must be one of {_TRUTHY_VALUES} or {_FALSY_VALUES}")
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
@overload
|
|
89
|
+
def resolve_int_env_var(env_var: LightningEnvVar, override: int, fallback: int) -> int: ...
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
@overload
|
|
93
|
+
def resolve_int_env_var(env_var: LightningEnvVar, *, fallback: int) -> int: ...
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
@overload
|
|
97
|
+
def resolve_int_env_var(
|
|
98
|
+
env_var: LightningEnvVar, override: int | None = None, fallback: int | None = None
|
|
99
|
+
) -> int | None: ...
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def resolve_int_env_var(
|
|
103
|
+
env_var: LightningEnvVar, override: int | None = None, fallback: int | None = None
|
|
104
|
+
) -> int | None:
|
|
105
|
+
"""Resolve an integer environment variable.
|
|
106
|
+
|
|
107
|
+
Args:
|
|
108
|
+
env_var: The environment variable to resolve.
|
|
109
|
+
override: Optional override supplied by the caller.
|
|
110
|
+
fallback: Default value if the environment variable is not set.
|
|
111
|
+
"""
|
|
112
|
+
if override is not None:
|
|
113
|
+
return override
|
|
114
|
+
|
|
115
|
+
env_value = os.getenv(env_var.value)
|
|
116
|
+
if env_value is None:
|
|
117
|
+
return fallback
|
|
118
|
+
|
|
119
|
+
try:
|
|
120
|
+
return int(env_value)
|
|
121
|
+
except ValueError:
|
|
122
|
+
raise ValueError(f"{env_var.value} must be an integer")
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
@overload
|
|
126
|
+
def resolve_str_env_var(env_var: LightningEnvVar, override: str, fallback: str) -> str: ...
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
@overload
|
|
130
|
+
def resolve_str_env_var(env_var: LightningEnvVar, *, fallback: str) -> str: ...
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
@overload
|
|
134
|
+
def resolve_str_env_var(
|
|
135
|
+
env_var: LightningEnvVar, override: str | None = None, fallback: str | None = None
|
|
136
|
+
) -> str | None: ...
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def resolve_str_env_var(
|
|
140
|
+
env_var: LightningEnvVar, override: str | None = None, fallback: str | None = None
|
|
141
|
+
) -> str | None:
|
|
142
|
+
"""Resolve a string environment variable.
|
|
143
|
+
|
|
144
|
+
Args:
|
|
145
|
+
env_var: The environment variable to resolve.
|
|
146
|
+
override: Optional override supplied by the caller.
|
|
147
|
+
fallback: Default value if the environment variable is not set.
|
|
148
|
+
"""
|
|
149
|
+
if override is not None:
|
|
150
|
+
return override
|
|
151
|
+
|
|
152
|
+
env_value = os.getenv(env_var.value)
|
|
153
|
+
if env_value is None:
|
|
154
|
+
return fallback
|
|
155
|
+
|
|
156
|
+
return env_value
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
# Copyright (c) Microsoft. All rights reserved.
|
|
2
|
+
|
|
3
|
+
from .base import ExecutionStrategy
|
|
4
|
+
from .client_server import ClientServerExecutionStrategy
|
|
5
|
+
from .events import ExecutionEvent, MultiprocessingEvent, ThreadingEvent
|
|
6
|
+
from .shared_memory import SharedMemoryExecutionStrategy
|
|
7
|
+
|
|
8
|
+
__all__ = [
|
|
9
|
+
"ExecutionStrategy",
|
|
10
|
+
"ClientServerExecutionStrategy",
|
|
11
|
+
"ExecutionEvent",
|
|
12
|
+
"ThreadingEvent",
|
|
13
|
+
"MultiprocessingEvent",
|
|
14
|
+
"SharedMemoryExecutionStrategy",
|
|
15
|
+
]
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
# Copyright (c) Microsoft. All rights reserved.
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
from typing import Protocol
|
|
7
|
+
|
|
8
|
+
from mantisdk.store.base import LightningStore
|
|
9
|
+
|
|
10
|
+
from .events import ExecutionEvent
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger(__name__)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class AlgorithmBundle(Protocol):
|
|
16
|
+
"""Callable bundle produced by [`Trainer`][mantisdk.Trainer].
|
|
17
|
+
|
|
18
|
+
Execution strategies treat the returned coroutine as opaque, only providing
|
|
19
|
+
the shared store instance and cooperative stop event. Bundles typically
|
|
20
|
+
encapsulate algorithm setup plus adapter and LLM proxy, etc.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
async def __call__(self, store: LightningStore, event: ExecutionEvent) -> None:
|
|
24
|
+
"""Execute algorithm logic using ``store`` until completion or stop."""
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class RunnerBundle(Protocol):
|
|
28
|
+
"""Callable bundle wrapping runner setup and the worker loop, as opposed to the
|
|
29
|
+
[`AlgorithmBundle`][mantisdk.AlgorithmBundle]."""
|
|
30
|
+
|
|
31
|
+
async def __call__(self, store: LightningStore, worker_id: int, event: ExecutionEvent) -> None:
|
|
32
|
+
"""Execute runner logic for ``worker_id`` using ``store`` and ``event``."""
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class ExecutionStrategy:
|
|
36
|
+
"""Coordinate algorithm and runner bundles within a single process abstraction.
|
|
37
|
+
|
|
38
|
+
Strategies decide how many worker bundles to launch, whether to communicate
|
|
39
|
+
through shared memory or an HTTP boundary, and how to react to shutdown
|
|
40
|
+
signals. They intentionally avoid inspecting the bundle internals; instead,
|
|
41
|
+
each bundle remains responsible for its own scheduling semantics.
|
|
42
|
+
|
|
43
|
+
!!! note
|
|
44
|
+
Implementations must honor the [execute()][mantisdk.ExecutionStrategy.execute]
|
|
45
|
+
contract by propagating `KeyboardInterrupt` and ensuring resources are
|
|
46
|
+
released when an error occurs on either side of the algorithm/runner
|
|
47
|
+
pair.
|
|
48
|
+
"""
|
|
49
|
+
|
|
50
|
+
def execute(self, algorithm: AlgorithmBundle, runner: RunnerBundle, store: LightningStore) -> None:
|
|
51
|
+
"""Run the provided bundles using the configured orchestration model.
|
|
52
|
+
|
|
53
|
+
Args:
|
|
54
|
+
algorithm: Callable bundle responsible for algorithm execution.
|
|
55
|
+
runner: Callable bundle for runner workers.
|
|
56
|
+
store: Concrete [`LightningStore`][mantisdk.LightningStore]
|
|
57
|
+
shared across bundles.
|
|
58
|
+
|
|
59
|
+
Raises:
|
|
60
|
+
NotImplementedError: Subclasses must provide the orchestration
|
|
61
|
+
implementation.
|
|
62
|
+
"""
|
|
63
|
+
|
|
64
|
+
raise NotImplementedError()
|