PyPI - mantisdk - Versions diffs - 0.1.0__py3-none-any.whl - Mend

mantisdk 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mantisdk might be problematic. Click here for more details.

Files changed (190) hide show

mantisdk/__init__.py +22 -0
mantisdk/adapter/__init__.py +15 -0
mantisdk/adapter/base.py +94 -0
mantisdk/adapter/messages.py +270 -0
mantisdk/adapter/triplet.py +1028 -0
mantisdk/algorithm/__init__.py +39 -0
mantisdk/algorithm/apo/__init__.py +5 -0
mantisdk/algorithm/apo/apo.py +889 -0
mantisdk/algorithm/apo/prompts/apply_edit_variant01.poml +22 -0
mantisdk/algorithm/apo/prompts/apply_edit_variant02.poml +18 -0
mantisdk/algorithm/apo/prompts/text_gradient_variant01.poml +18 -0
mantisdk/algorithm/apo/prompts/text_gradient_variant02.poml +16 -0
mantisdk/algorithm/apo/prompts/text_gradient_variant03.poml +107 -0
mantisdk/algorithm/base.py +162 -0
mantisdk/algorithm/decorator.py +264 -0
mantisdk/algorithm/fast.py +250 -0
mantisdk/algorithm/gepa/__init__.py +59 -0
mantisdk/algorithm/gepa/adapter.py +459 -0
mantisdk/algorithm/gepa/gepa.py +364 -0
mantisdk/algorithm/gepa/lib/__init__.py +18 -0
mantisdk/algorithm/gepa/lib/adapters/README.md +12 -0
mantisdk/algorithm/gepa/lib/adapters/__init__.py +0 -0
mantisdk/algorithm/gepa/lib/adapters/anymaths_adapter/README.md +341 -0
mantisdk/algorithm/gepa/lib/adapters/anymaths_adapter/__init__.py +1 -0
mantisdk/algorithm/gepa/lib/adapters/anymaths_adapter/anymaths_adapter.py +174 -0
mantisdk/algorithm/gepa/lib/adapters/anymaths_adapter/requirements.txt +1 -0
mantisdk/algorithm/gepa/lib/adapters/default_adapter/README.md +0 -0
mantisdk/algorithm/gepa/lib/adapters/default_adapter/__init__.py +0 -0
mantisdk/algorithm/gepa/lib/adapters/default_adapter/default_adapter.py +209 -0
mantisdk/algorithm/gepa/lib/adapters/dspy_adapter/README.md +7 -0
mantisdk/algorithm/gepa/lib/adapters/dspy_adapter/__init__.py +0 -0
mantisdk/algorithm/gepa/lib/adapters/dspy_adapter/dspy_adapter.py +307 -0
mantisdk/algorithm/gepa/lib/adapters/dspy_full_program_adapter/README.md +99 -0
mantisdk/algorithm/gepa/lib/adapters/dspy_full_program_adapter/dspy_program_proposal_signature.py +137 -0
mantisdk/algorithm/gepa/lib/adapters/dspy_full_program_adapter/full_program_adapter.py +266 -0
mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/GEPA_RAG.md +621 -0
mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/__init__.py +56 -0
mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/evaluation_metrics.py +226 -0
mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/generic_rag_adapter.py +496 -0
mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/rag_pipeline.py +238 -0
mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_store_interface.py +212 -0
mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_stores/__init__.py +2 -0
mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_stores/chroma_store.py +196 -0
mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_stores/lancedb_store.py +422 -0
mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_stores/milvus_store.py +409 -0
mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_stores/qdrant_store.py +368 -0
mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_stores/weaviate_store.py +418 -0
mantisdk/algorithm/gepa/lib/adapters/mcp_adapter/README.md +552 -0
mantisdk/algorithm/gepa/lib/adapters/mcp_adapter/__init__.py +37 -0
mantisdk/algorithm/gepa/lib/adapters/mcp_adapter/mcp_adapter.py +705 -0
mantisdk/algorithm/gepa/lib/adapters/mcp_adapter/mcp_client.py +364 -0
mantisdk/algorithm/gepa/lib/adapters/terminal_bench_adapter/README.md +9 -0
mantisdk/algorithm/gepa/lib/adapters/terminal_bench_adapter/__init__.py +0 -0
mantisdk/algorithm/gepa/lib/adapters/terminal_bench_adapter/terminal_bench_adapter.py +217 -0
mantisdk/algorithm/gepa/lib/api.py +375 -0
mantisdk/algorithm/gepa/lib/core/__init__.py +0 -0
mantisdk/algorithm/gepa/lib/core/adapter.py +180 -0
mantisdk/algorithm/gepa/lib/core/data_loader.py +74 -0
mantisdk/algorithm/gepa/lib/core/engine.py +356 -0
mantisdk/algorithm/gepa/lib/core/result.py +233 -0
mantisdk/algorithm/gepa/lib/core/state.py +636 -0
mantisdk/algorithm/gepa/lib/examples/__init__.py +0 -0
mantisdk/algorithm/gepa/lib/examples/aime.py +24 -0
mantisdk/algorithm/gepa/lib/examples/anymaths-bench/eval_default.py +111 -0
mantisdk/algorithm/gepa/lib/examples/anymaths-bench/prompt-templates/instruction_prompt.txt +9 -0
mantisdk/algorithm/gepa/lib/examples/anymaths-bench/prompt-templates/optimal_prompt.txt +24 -0
mantisdk/algorithm/gepa/lib/examples/anymaths-bench/train_anymaths.py +177 -0
mantisdk/algorithm/gepa/lib/examples/dspy_full_program_evolution/arc_agi.ipynb +25705 -0
mantisdk/algorithm/gepa/lib/examples/dspy_full_program_evolution/example.ipynb +348 -0
mantisdk/algorithm/gepa/lib/examples/mcp_adapter/__init__.py +4 -0
mantisdk/algorithm/gepa/lib/examples/mcp_adapter/mcp_optimization_example.py +455 -0
mantisdk/algorithm/gepa/lib/examples/rag_adapter/RAG_GUIDE.md +613 -0
mantisdk/algorithm/gepa/lib/examples/rag_adapter/__init__.py +9 -0
mantisdk/algorithm/gepa/lib/examples/rag_adapter/rag_optimization.py +824 -0
mantisdk/algorithm/gepa/lib/examples/rag_adapter/requirements-rag.txt +29 -0
mantisdk/algorithm/gepa/lib/examples/terminal-bench/prompt-templates/instruction_prompt.txt +16 -0
mantisdk/algorithm/gepa/lib/examples/terminal-bench/prompt-templates/terminus.txt +9 -0
mantisdk/algorithm/gepa/lib/examples/terminal-bench/train_terminus.py +161 -0
mantisdk/algorithm/gepa/lib/gepa_utils.py +117 -0
mantisdk/algorithm/gepa/lib/logging/__init__.py +0 -0
mantisdk/algorithm/gepa/lib/logging/experiment_tracker.py +187 -0
mantisdk/algorithm/gepa/lib/logging/logger.py +75 -0
mantisdk/algorithm/gepa/lib/logging/utils.py +103 -0
mantisdk/algorithm/gepa/lib/proposer/__init__.py +0 -0
mantisdk/algorithm/gepa/lib/proposer/base.py +31 -0
mantisdk/algorithm/gepa/lib/proposer/merge.py +357 -0
mantisdk/algorithm/gepa/lib/proposer/reflective_mutation/__init__.py +0 -0
mantisdk/algorithm/gepa/lib/proposer/reflective_mutation/base.py +49 -0
mantisdk/algorithm/gepa/lib/proposer/reflective_mutation/reflective_mutation.py +176 -0
mantisdk/algorithm/gepa/lib/py.typed +0 -0
mantisdk/algorithm/gepa/lib/strategies/__init__.py +0 -0
mantisdk/algorithm/gepa/lib/strategies/batch_sampler.py +77 -0
mantisdk/algorithm/gepa/lib/strategies/candidate_selector.py +50 -0
mantisdk/algorithm/gepa/lib/strategies/component_selector.py +36 -0
mantisdk/algorithm/gepa/lib/strategies/eval_policy.py +64 -0
mantisdk/algorithm/gepa/lib/strategies/instruction_proposal.py +127 -0
mantisdk/algorithm/gepa/lib/utils/__init__.py +10 -0
mantisdk/algorithm/gepa/lib/utils/stop_condition.py +196 -0
mantisdk/algorithm/gepa/tracing.py +105 -0
mantisdk/algorithm/utils.py +177 -0
mantisdk/algorithm/verl/__init__.py +5 -0
mantisdk/algorithm/verl/interface.py +202 -0
mantisdk/cli/__init__.py +56 -0
mantisdk/cli/prometheus.py +115 -0
mantisdk/cli/store.py +131 -0
mantisdk/cli/vllm.py +29 -0
mantisdk/client.py +408 -0
mantisdk/config.py +348 -0
mantisdk/emitter/__init__.py +43 -0
mantisdk/emitter/annotation.py +370 -0
mantisdk/emitter/exception.py +54 -0
mantisdk/emitter/message.py +61 -0
mantisdk/emitter/object.py +117 -0
mantisdk/emitter/reward.py +320 -0
mantisdk/env_var.py +156 -0
mantisdk/execution/__init__.py +15 -0
mantisdk/execution/base.py +64 -0
mantisdk/execution/client_server.py +443 -0
mantisdk/execution/events.py +69 -0
mantisdk/execution/inter_process.py +16 -0
mantisdk/execution/shared_memory.py +282 -0
mantisdk/instrumentation/__init__.py +119 -0
mantisdk/instrumentation/agentops.py +314 -0
mantisdk/instrumentation/agentops_langchain.py +45 -0
mantisdk/instrumentation/litellm.py +83 -0
mantisdk/instrumentation/vllm.py +81 -0
mantisdk/instrumentation/weave.py +500 -0
mantisdk/litagent/__init__.py +11 -0
mantisdk/litagent/decorator.py +536 -0
mantisdk/litagent/litagent.py +252 -0
mantisdk/llm_proxy.py +1890 -0
mantisdk/logging.py +370 -0
mantisdk/reward.py +7 -0
mantisdk/runner/__init__.py +11 -0
mantisdk/runner/agent.py +845 -0
mantisdk/runner/base.py +182 -0
mantisdk/runner/legacy.py +309 -0
mantisdk/semconv.py +170 -0
mantisdk/server.py +401 -0
mantisdk/store/__init__.py +23 -0
mantisdk/store/base.py +897 -0
mantisdk/store/client_server.py +2092 -0
mantisdk/store/collection/__init__.py +30 -0
mantisdk/store/collection/base.py +587 -0
mantisdk/store/collection/memory.py +970 -0
mantisdk/store/collection/mongo.py +1412 -0
mantisdk/store/collection_based.py +1823 -0
mantisdk/store/insight.py +648 -0
mantisdk/store/listener.py +58 -0
mantisdk/store/memory.py +396 -0
mantisdk/store/mongo.py +165 -0
mantisdk/store/sqlite.py +3 -0
mantisdk/store/threading.py +357 -0
mantisdk/store/utils.py +142 -0
mantisdk/tracer/__init__.py +16 -0
mantisdk/tracer/agentops.py +242 -0
mantisdk/tracer/base.py +287 -0
mantisdk/tracer/dummy.py +106 -0
mantisdk/tracer/otel.py +555 -0
mantisdk/tracer/weave.py +677 -0
mantisdk/trainer/__init__.py +6 -0
mantisdk/trainer/init_utils.py +263 -0
mantisdk/trainer/legacy.py +367 -0
mantisdk/trainer/registry.py +12 -0
mantisdk/trainer/trainer.py +618 -0
mantisdk/types/__init__.py +6 -0
mantisdk/types/core.py +553 -0
mantisdk/types/resources.py +204 -0
mantisdk/types/tracer.py +515 -0
mantisdk/types/tracing.py +218 -0
mantisdk/utils/__init__.py +1 -0
mantisdk/utils/id.py +18 -0
mantisdk/utils/metrics.py +1025 -0
mantisdk/utils/otel.py +578 -0
mantisdk/utils/otlp.py +536 -0
mantisdk/utils/server_launcher.py +1045 -0
mantisdk/utils/system_snapshot.py +81 -0
mantisdk/verl/__init__.py +8 -0
mantisdk/verl/__main__.py +6 -0
mantisdk/verl/async_server.py +46 -0
mantisdk/verl/config.yaml +27 -0
mantisdk/verl/daemon.py +1154 -0
mantisdk/verl/dataset.py +44 -0
mantisdk/verl/entrypoint.py +248 -0
mantisdk/verl/trainer.py +549 -0
mantisdk-0.1.0.dist-info/METADATA +119 -0
mantisdk-0.1.0.dist-info/RECORD +190 -0
mantisdk-0.1.0.dist-info/WHEEL +4 -0
mantisdk-0.1.0.dist-info/entry_points.txt +2 -0
mantisdk-0.1.0.dist-info/licenses/LICENSE +19 -0

mantisdk/algorithm/gepa/lib/utils/stop_condition.py ADDED Viewed

@@ -0,0 +1,196 @@
+"""
+Utility functions for graceful stopping of GEPA runs.
+"""
+import os
+import signal
+import time
+from typing import Literal, Protocol, runtime_checkable
+from mantisdk.algorithm.gepa.lib.core.state import GEPAState
+@runtime_checkable
+class StopperProtocol(Protocol):
+    """
+    Protocol for stop condition objects.
+    A stopper is a callable object that returns True when the optimization should stop.
+    """
+    def __call__(self, gepa_state: GEPAState) -> bool:
+        """
+        Check if the optimization should stop.
+        Args:
+            gepa_state: The current GEPA state containing optimization information
+        Returns:
+            True if the optimization should stop, False otherwise.
+        """
+        ...
+class TimeoutStopCondition(StopperProtocol):
+    """Stop callback that stops after a specified timeout."""
+    def __init__(self, timeout_seconds: float):
+        self.timeout_seconds = timeout_seconds
+        self.start_time = time.time()
+    def __call__(self, gepa_state: GEPAState) -> bool:
+        # return true if timeout has been reached
+        return time.time() - self.start_time > self.timeout_seconds
+class FileStopper(StopperProtocol):
+    """
+    Stop callback that stops when a specific file exists.
+    """
+    def __init__(self, stop_file_path: str):
+        self.stop_file_path = stop_file_path
+    def __call__(self, gepa_state: GEPAState) -> bool:
+        # returns true if stop file exists
+        return os.path.exists(self.stop_file_path)
+    def remove_stop_file(self):
+        # remove the stop file
+        if os.path.exists(self.stop_file_path):
+            os.remove(self.stop_file_path)
+class ScoreThresholdStopper(StopperProtocol):
+    """
+    Stop callback that stops when a score threshold is reached.
+    """
+    def __init__(self, threshold: float):
+        self.threshold = threshold
+    def __call__(self, gepa_state: GEPAState) -> bool:
+        # return true if score threshold is reached
+        try:
+            current_best_score = (
+                max(gepa_state.program_full_scores_val_set) if gepa_state.program_full_scores_val_set else 0.0
+            )
+            return current_best_score >= self.threshold
+        except Exception:
+            return False
+class NoImprovementStopper(StopperProtocol):
+    """
+    Stop callback that stops after a specified number of iterations without improvement.
+    """
+    def __init__(self, max_iterations_without_improvement: int):
+        self.max_iterations_without_improvement = max_iterations_without_improvement
+        self.best_score = float("-inf")
+        self.iterations_without_improvement = 0
+    def __call__(self, gepa_state: GEPAState) -> bool:
+        # return true if max iterations without improvement reached
+        try:
+            current_score = (
+                max(gepa_state.program_full_scores_val_set) if gepa_state.program_full_scores_val_set else 0.0
+            )
+            if current_score > self.best_score:
+                self.best_score = current_score
+                self.iterations_without_improvement = 0
+            else:
+                self.iterations_without_improvement += 1
+            return self.iterations_without_improvement >= self.max_iterations_without_improvement
+        except Exception:
+            return False
+    def reset(self):
+        """Reset the counter (useful when manually improving the score)."""
+        self.iterations_without_improvement = 0
+class SignalStopper(StopperProtocol):
+    """Stop callback that stops when a signal is received."""
+    def __init__(self, signals=None):
+        self.signals = signals or [signal.SIGINT, signal.SIGTERM]
+        self._stop_requested = False
+        self._original_handlers = {}
+        self._setup_signal_handlers()
+    def _setup_signal_handlers(self):
+        """Set up signal handlers for graceful shutdown."""
+        def signal_handler(signum, frame):
+            self._stop_requested = True
+        # Store original handlers and set new ones
+        for sig in self.signals:
+            try:
+                self._original_handlers[sig] = signal.signal(sig, signal_handler)
+            except (OSError, ValueError):
+                # Signal not available on this platform
+                pass
+    def __call__(self, gepa_state: GEPAState) -> bool:
+        # return true if a signal was received
+        return self._stop_requested
+    def cleanup(self):
+        """Restore original signal handlers."""
+        for sig, handler in self._original_handlers.items():
+            try:
+                signal.signal(sig, handler)
+            except (OSError, ValueError):
+                pass
+class MaxTrackedCandidatesStopper(StopperProtocol):
+    """
+    Stop callback that stops after a maximum number of tracked candidates.
+    """
+    def __init__(self, max_tracked_candidates: int):
+        self.max_tracked_candidates = max_tracked_candidates
+    def __call__(self, gepa_state: GEPAState) -> bool:
+        # return true if max tracked candidates reached
+        return len(gepa_state.program_candidates) >= self.max_tracked_candidates
+class MaxMetricCallsStopper(StopperProtocol):
+    """
+    Stop callback that stops after a maximum number of metric calls.
+    """
+    def __init__(self, max_metric_calls: int):
+        self.max_metric_calls = max_metric_calls
+    def __call__(self, gepa_state: GEPAState) -> bool:
+        # return true if max metric calls reached
+        return gepa_state.total_num_evals >= self.max_metric_calls
+class CompositeStopper(StopperProtocol):
+    """
+    Stop callback that combines multiple stopping conditions.
+    Allows combining several stoppers and stopping when any or all of them are triggered.
+    """
+    def __init__(self, *stoppers: StopperProtocol, mode: Literal["any", "all"] = "any"):
+        # initialize composite stopper
+        self.stoppers = stoppers
+        self.mode = mode
+    def __call__(self, gepa_state: GEPAState) -> bool:
+        # return true if stopping condition is met
+        if self.mode == "any":
+            return any(stopper(gepa_state) for stopper in self.stoppers)
+        elif self.mode == "all":
+            return all(stopper(gepa_state) for stopper in self.stoppers)
+        else:
+            raise ValueError(f"Unknown mode: {self.mode}")

mantisdk/algorithm/gepa/tracing.py ADDED Viewed

@@ -0,0 +1,105 @@
+# Copyright (c) Microsoft. All rights reserved.
+"""GEPA-specific tracing context for detailed execution tracking.
+This module provides a context class that tracks GEPA's execution state
+(generation, phase, candidate, batch) to enable rich tagging of traces.
+"""
+import uuid
+from dataclasses import dataclass, field
+from typing import List, Optional, Set
+@dataclass
+class GEPATracingContext:
+    """Tracks execution state for detailed tracing in GEPA optimization.
+    This class maintains state about the current phase of GEPA execution,
+    generation/iteration number, and batch counts to enable rich tagging
+    of traces for filtering and analysis in Mantis.
+    GEPA-specific phases:
+    - "train-eval": Evaluating candidates on training data
+    - "validation-eval": Evaluating candidates on validation data
+    - "reflection": LLM reflection to improve prompts (distinct from validation!)
+    Example:
+        >>> ctx = GEPATracingContext()
+        >>> ctx.generation
+        0
+        >>> ctx.session_id  # Auto-generated for grouping traces
+        'gepa-abc123def456'
+        >>> ctx.next_generation()
+        >>> ctx.generation
+        1
+        >>> batch_id = ctx.next_batch()
+        >>> batch_id
+        'batch-1'
+    Attributes:
+        generation: Current generation/iteration number (0-indexed).
+        phase: Current execution phase.
+        candidate_id: Short hash of the current candidate being evaluated.
+        batch_count: Number of batches processed in current generation.
+        training_item_ids: Set of item IDs seen during training (for validation detection).
+        session_id: Unique session identifier for grouping all traces in this GEPA run.
+    """
+    generation: int = 0
+    phase: str = "train-eval"
+    candidate_id: Optional[str] = None
+    batch_count: int = 0
+    training_item_ids: Set[str] = field(default_factory=set)
+    session_id: str = field(default_factory=lambda: f"gepa-{uuid.uuid4().hex[:12]}")
+    def next_batch(self) -> str:
+        """Increment batch count and return batch identifier.
+        Returns:
+            Batch identifier string (e.g., "batch-1").
+        """
+        self.batch_count += 1
+        return f"batch-{self.batch_count}"
+    def set_phase(self, phase: str) -> None:
+        """Set the current execution phase.
+        Args:
+            phase: Phase name (e.g., "train-eval", "validation-eval", "reflection").
+        """
+        self.phase = phase
+    def next_generation(self) -> None:
+        """Increment generation counter and reset batch count."""
+        self.generation += 1
+        self.batch_count = 0
+    def set_candidate(self, candidate_id: str) -> None:
+        """Set the current candidate identifier.
+        Args:
+            candidate_id: Short hash or identifier for the candidate.
+        """
+        self.candidate_id = candidate_id
+    def register_training_items(self, item_ids: List[str]) -> None:
+        """Register item IDs as training data for validation detection.
+        Args:
+            item_ids: List of item IDs from the training batch.
+        """
+        self.training_item_ids.update(item_ids)
+    def is_validation_batch(self, item_ids: List[str]) -> bool:
+        """Check if a batch contains validation items (not in training set).
+        Args:
+            item_ids: List of item IDs from the batch.
+        Returns:
+            True if any item is not in the training set.
+        """
+        if not self.training_item_ids:
+            return False
+        return any(item_id not in self.training_item_ids for item_id in item_ids)

mantisdk/algorithm/utils.py ADDED Viewed

@@ -0,0 +1,177 @@
+# Copyright (c) Microsoft. All rights reserved.
+from __future__ import annotations
+import functools
+import logging
+import random
+from collections.abc import Coroutine
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Callable,
+    Concatenate,
+    Iterator,
+    List,
+    Literal,
+    Optional,
+    ParamSpec,
+    Sequence,
+    TypeVar,
+    overload,
+)
+from mantisdk.types import Dataset
+if TYPE_CHECKING:
+    from mantisdk.llm_proxy import LLMProxy
+    from mantisdk.store.base import LightningStore
+    from .base import Algorithm
+T_task = TypeVar("T_task")
+T_algo = TypeVar("T_algo", bound="Algorithm")
+P = ParamSpec("P")
+R = TypeVar("R")
+logger = logging.getLogger(__name__)
+def batch_iter_over_dataset(dataset: Dataset[T_task], batch_size: int) -> Iterator[Sequence[T_task]]:
+    """
+    Create an infinite iterator that yields batches from the dataset.
+    When batch_size >= dataset size, yields the entire shuffled dataset repeatedly.
+    When batch_size < dataset size, yields batches of the specified size, reshuffling
+    after each complete pass through the dataset.
+    Args:
+        dataset: The dataset to iterate over.
+        batch_size: The desired batch size.
+    Yields:
+        Sequences of tasks from the dataset. Each task appears at most once per epoch.
+    """
+    if batch_size >= len(dataset):
+        while True:
+            dataset_copy = [dataset[i] for i in range(len(dataset))]
+            random.shuffle(dataset_copy)
+            yield dataset_copy
+    else:
+        current_batch: List[int] = []
+        while True:
+            indices = list(range(len(dataset)))
+            random.shuffle(indices)
+            for index in indices:
+                if index in current_batch:
+                    continue
+                current_batch.append(index)
+                if len(current_batch) == batch_size:
+                    yield [dataset[index] for index in current_batch]
+                    current_batch = []
+def with_store(
+    func: Callable[Concatenate[T_algo, LightningStore, P], Coroutine[Any, Any, R]],
+) -> Callable[Concatenate[T_algo, P], Coroutine[Any, Any, R]]:
+    """Inject the algorithm's `LightningStore` into coroutine methods.
+    The decorator calls `Algorithm.get_store()` once per invocation and passes the
+    resulting store as an explicit argument to the wrapped coroutine. Decorated
+    methods therefore receive the resolved store even when invoked by helper
+    utilities rather than directly by the algorithm.
+    Args:
+        func: The coroutine that expects `(self, store, *args, **kwargs)`.
+    Returns:
+        A coroutine wrapper that automatically retrieves the store and forwards it
+        to `func`.
+    """
+    @functools.wraps(func)
+    async def wrapper(self: T_algo, *args: P.args, **kwargs: P.kwargs) -> R:
+        store = self.get_store()
+        return await func(self, store, *args, **kwargs)
+    return wrapper
+@overload
+def with_llm_proxy(
+    required: Literal[False] = False,
+    auto_start: bool = True,
+) -> Callable[
+    [Callable[Concatenate[T_algo, Optional[LLMProxy], P], Coroutine[Any, Any, R]]],
+    Callable[Concatenate[T_algo, P], Coroutine[Any, Any, R]],
+]: ...
+@overload
+def with_llm_proxy(
+    required: Literal[True],
+    auto_start: bool = True,
+) -> Callable[
+    [Callable[Concatenate[T_algo, LLMProxy, P], Coroutine[Any, Any, R]]],
+    Callable[Concatenate[T_algo, P], Coroutine[Any, Any, R]],
+]: ...
+def with_llm_proxy(
+    required: bool = False,
+    auto_start: bool = True,
+) -> Callable[
+    [Callable[..., Coroutine[Any, Any, Any]]],
+    Callable[..., Coroutine[Any, Any, Any]],
+]:
+    """Resolve and optionally lifecycle-manage the configured LLM proxy.
+    Args:
+        required: When True, raises `ValueError` if the algorithm does not have an
+            [`LLMProxy`][mantisdk.LLMProxy] set. When False, the wrapped coroutine receives
+            `None` if no proxy is available.
+        auto_start: When True, [`LLMProxy.start()`][mantisdk.LLMProxy.start] is invoked if the proxy is not
+            already running before calling `func` and [`LLMProxy.stop()`][mantisdk.LLMProxy.stop] is
+            called afterwards.
+    Returns:
+        A decorator that injects the [`LLMProxy`][mantisdk.LLMProxy] (or `None`) as the first
+        argument after `self` and manages automatic startup/shutdown when requested.
+    """
+    def decorator(
+        func: Callable[..., Coroutine[Any, Any, Any]],
+    ) -> Callable[..., Coroutine[Any, Any, Any]]:
+        @functools.wraps(func)
+        async def wrapper(self: Algorithm, *args: Any, **kwargs: Any) -> Any:
+            llm_proxy = self.get_llm_proxy()
+            if required and llm_proxy is None:
+                raise ValueError(
+                    "LLM proxy is required but not configured. Call set_llm_proxy() before using this method."
+                )
+            auto_started = False
+            if auto_start and llm_proxy is not None:
+                if llm_proxy.is_running():
+                    logger.info("Proxy is already running, skipping start")
+                else:
+                    logger.info("Starting proxy, managed by the algorithm")
+                    await llm_proxy.start()
+                    auto_started = True
+            try:
+                # At type level, overloads guarantee that if `required=True`
+                # then `func` expects a non-optional LLMProxy.
+                return await func(self, llm_proxy, *args, **kwargs)
+            finally:
+                if auto_started and llm_proxy is not None:
+                    logger.info("Stopping proxy, managed by the algorithm")
+                    await llm_proxy.stop()
+        return wrapper
+    return decorator

mantisdk/algorithm/verl/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+# Copyright (c) Microsoft. All rights reserved.
+from .interface import VERL
+__all__ = ["VERL"]

mantisdk/algorithm/verl/interface.py ADDED Viewed

@@ -0,0 +1,202 @@
+# Copyright (c) Microsoft. All rights reserved.
+from __future__ import annotations
+from typing import TYPE_CHECKING, Any, Optional, Type
+from hydra import compose, initialize
+from omegaconf import OmegaConf
+from mantisdk.algorithm.base import Algorithm
+from mantisdk.client import MantisdkClient
+from mantisdk.types import Dataset
+from mantisdk.verl.entrypoint import run_ppo  # type: ignore
+if TYPE_CHECKING:
+    from mantisdk.verl.daemon import AgentModeDaemon
+    from mantisdk.verl.trainer import MantisdkTrainer
+class VERL(Algorithm):
+    """VERL-powered algorithm that delegates training to the VERL PPO runner.
+    !!! warning
+        Advanced customisation currently requires copying the VERL source and
+        modifying it directly. Native hooks for overriding training behaviour
+        will land in a future release.
+    Args:
+        config: Dictionary mirroring the overrides passed to the VERL CLI. The
+            overrides are merged with VERL's packaged defaults via Hydra before
+            launching training.
+        trainer_cls: Optional override for the trainer class. Experimental.
+        daemon_cls: Optional override for the daemon class. Experimental.
+    !!! note "Trajectory aggregation (experimental)"
+        Trajectory-level aggregation merges an entire multi-turn rollout into a single,
+        masked training sample so GPU time is spent once per trajectory rather than N times
+        per turn. Enable it via:
+        ```python
+        config["mantisdk"]["trace_aggregator"] = {
+            "level": "trajectory",
+            "trajectory_max_prompt_length": 4096,
+            "trajectory_max_response_length": 34384,
+        }
+        ```
+        Keep conversations structured (message lists rather than manual string
+        concatenation) so prefix matching can stitch traces. `trajectory_max_prompt_length`
+        should be set to the maximum length of the prompt for the first turn, and
+        `trajectory_max_response_length` should be set to the maximum cumulative
+        length of agent responses in the full trajectory.
+        Toggle `debug=True` plus `mismatch_log_dir` when you need to inspect
+        retokenization or chat-template mismatches. See
+        [this blog post](https://mantisdk.github.io/posts/trajectory_level_aggregation/)
+        for more details.
+    Examples:
+        ```python
+        from mantisdk.algorithm.verl import VERL
+        algorithm = VERL(
+            config={
+                "algorithm": {
+                    "adv_estimator": "grpo",
+                    "use_kl_in_reward": False,
+                },
+                "data": {
+                    "train_batch_size": 32,
+                    "max_prompt_length": 4096,
+                    "max_response_length": 2048,
+                },
+                "actor_rollout_ref": {
+                    "rollout": {
+                        "tensor_model_parallel_size": 1,
+                        "n": 4,
+                        "log_prob_micro_batch_size_per_gpu": 4,
+                        "multi_turn": {"format": "hermes"},
+                        "name": "vllm",
+                        "gpu_memory_utilization": 0.6,
+                    },
+                    "actor": {
+                        "ppo_mini_batch_size": 32,
+                        "ppo_micro_batch_size_per_gpu": 4,
+                        "optim": {"lr": 1e-6},
+                        "use_kl_loss": False,
+                        "kl_loss_coef": 0.0,
+                        "entropy_coeff": 0,
+                        "clip_ratio_low": 0.2,
+                        "clip_ratio_high": 0.3,
+                        "fsdp_config": {
+                            "param_offload": True,
+                            "optimizer_offload": True,
+                        },
+                    },
+                    "ref": {
+                        "log_prob_micro_batch_size_per_gpu": 8,
+                        "fsdp_config": {"param_offload": True},
+                    },
+                    "model": {
+                        "path": "Qwen/Qwen2.5-1.5B-Instruct",
+                        "use_remove_padding": True,
+                        "enable_gradient_checkpointing": True,
+                    },
+                },
+                "trainer": {
+                    "n_gpus_per_node": 1,
+                    "val_before_train": True,
+                    "critic_warmup": 0,
+                    "logger": ["console", "wandb"],
+                    "project_name": "Mantisdk",
+                    "experiment_name": "calc_x",
+                    "nnodes": 1,
+                    "save_freq": 64,
+                    "test_freq": 32,
+                    "total_epochs": 2,
+                },
+            }
+        )
+        trainer.fit(algorithm, train_dataset=my_train_dataset)
+        ```
+    """
+    def __init__(
+        self,
+        config: dict[str, Any],
+        trainer_cls: Optional[Type[MantisdkTrainer]] = None,
+        daemon_cls: Optional[Type[AgentModeDaemon]] = None,
+    ):
+        super().__init__()
+        # Compose the base config exactly like your decorator:
+        with initialize(version_base=None, config_path="pkg://mantisdk/verl"):
+            base_cfg = compose(config_name="config")
+        # Merge your dict overrides
+        override_conf = OmegaConf.create(config)
+        # Allow adding new fields
+        OmegaConf.set_struct(base_cfg, False)
+        self.config = OmegaConf.merge(base_cfg, override_conf)
+        self.trainer_cls = trainer_cls
+        self.daemon_cls = daemon_cls
+    def run(
+        self,
+        train_dataset: Optional[Dataset[Any]] = None,
+        val_dataset: Optional[Dataset[Any]] = None,
+    ) -> None:
+        """Launch the VERL PPO entrypoint with the configured runtime context.
+        Args:
+            train_dataset: Optional dataset forwarded to VERL for training.
+            val_dataset: Optional dataset forwarded to VERL for evaluation.
+        Raises:
+            ValueError: If required dependencies such as the store, LLM proxy, or
+                adapter have been garbage-collected when using the V1 execution
+                mode.
+        """
+        from mantisdk.verl.daemon import AgentModeDaemon
+        from mantisdk.verl.trainer import MantisdkTrainer
+        trainer_cls = self.trainer_cls or MantisdkTrainer
+        daemon_cls = self.daemon_cls or AgentModeDaemon
+        try:
+            store = self.get_store()
+        except Exception:
+            print("Store is not set. Assuming v0 execution mode.")
+            run_ppo(
+                self.config,
+                train_dataset=train_dataset,
+                val_dataset=val_dataset,
+                store=None,
+                llm_proxy=None,
+                adapter=None,
+                trainer_cls=trainer_cls,
+                daemon_cls=daemon_cls,
+            )
+        else:
+            print("Store is set. Assuming v1 execution mode.")
+            llm_proxy = self.get_llm_proxy()
+            adapter = self.get_adapter()
+            run_ppo(
+                self.config,
+                train_dataset=train_dataset,
+                val_dataset=val_dataset,
+                store=store,
+                llm_proxy=llm_proxy,
+                adapter=adapter,
+                trainer_cls=trainer_cls,
+                daemon_cls=daemon_cls,
+            )
+    def get_client(self) -> MantisdkClient:
+        """Create a client bound to the VERL-managed Mantisdk server.
+        Deprecated:
+            Since v0.2.
+        """
+        port = self.config.mantisdk.port
+        return MantisdkClient(endpoint=f"http://localhost:{port}")