PyPI - relai - Versions diffs - 0.3.2__py3-none-any.whl → 0.3.4__py3-none-any.whl - Mend

relai 0.3.2py3-none-any.whl → 0.3.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of relai might be problematic. Click here for more details.

Files changed (11) hide show

relai/logger.py +5 -2
relai/maestro/optimizer.py +127 -87
relai/mocker/persona.py +6 -2
relai/mocker/tool.py +13 -12
relai/simulator.py +16 -8
relai/utils.py +37 -0
{relai-0.3.2.dist-info → relai-0.3.4.dist-info}/METADATA +194 -8
{relai-0.3.2.dist-info → relai-0.3.4.dist-info}/RECORD +11 -11
{relai-0.3.2.dist-info → relai-0.3.4.dist-info}/WHEEL +0 -0
{relai-0.3.2.dist-info → relai-0.3.4.dist-info}/licenses/LICENSE.md +0 -0
{relai-0.3.2.dist-info → relai-0.3.4.dist-info}/top_level.txt +0 -0

relai/logger.py CHANGED Viewed

@@ -31,8 +31,11 @@ def flatten(mapping: Mapping[str, Any]) -> Iterator[tuple[str, AttributeValue]]:
                 yield f"{key}.{sub_key}", sub_value
         elif isinstance(value, list) and any(isinstance(item, Mapping) for item in value):
             for index, sub_mapping in enumerate(value):
-                for sub_key, sub_value in flatten(sub_mapping):
-                    yield f"{key}.{index}.{sub_key}", sub_value
+                if isinstance(sub_mapping, Mapping):
+                    for sub_key, sub_value in flatten(sub_mapping):
+                        yield f"{key}.{index}.{sub_key}", sub_value
+                else:
+                    yield f"{key}.{index}", sub_mapping
         else:
             if isinstance(value, Enum):
                 value = value.value

relai/maestro/optimizer.py CHANGED Viewed

@@ -4,6 +4,9 @@ import json
 import os
 from datetime import datetime, timezone
 from typing import Any, Awaitable, Optional
+from uuid import uuid4
+from tqdm.auto import tqdm
 from relai import AsyncRELAI
 from relai.critico.critico import Critico, CriticoLog
@@ -28,7 +31,6 @@ class Maestro:
         agent_fn: AsyncAgent,
         goal: Optional[str] = None,
         max_memory: int = 20,
-        max_proposals: int = 3,
         name: str = "No Name",
         log_to_platform: bool = True,
     ):
@@ -40,8 +42,6 @@ class Maestro:
                 will be considered as the only goal. Defaults to None.
             max_memory (int, optional): Control the maximum number of previous optimization history visible at each
                 optimization step. Defaults to 20.
-            max_proposals (int, optional): Control the maximum number of proposals to consider at each optimization step.
-                Defaults to 3.
             name (str, optional): Name of the configuration optimization visualization on RELAI platform.
                 Defaults to "No Name".
             log_to_platform (bool): Whether to log optimization progress and results on RELAI platform.
@@ -53,7 +53,6 @@ class Maestro:
         self.max_memory: int = max_memory
         self._client: AsyncRELAI = client
         self.goal: str = goal if goal is not None else "Higher scores"
-        self.max_proposals: int = max_proposals
         self.log_to_platform: bool = log_to_platform
         self.config_opt_viz_id: str | None = None
         self.name: str = name
@@ -135,8 +134,7 @@ class Maestro:
         """
         self.total_visits += 1
         self.versions[self.current_version]["average_score"] = (
-            self.versions[self.current_version]["average_score"] * self.versions[self.current_version]["visits"]
-            + score
+            self.versions[self.current_version]["average_score"] * self.versions[self.current_version]["visits"] + score
         ) / (self.versions[self.current_version]["visits"] + 1.0)
         self.versions[self.current_version]["visits"] += 1
@@ -162,7 +160,7 @@ class Maestro:
             return str(agent_outputs)
     async def _evaluate(
-        self, awaitables: list[Awaitable], criticos: list[Critico], verbose: bool = True, print_flag: str = ""
+        self, awaitables: list[Awaitable], criticos: list[Critico], verbose: bool = False, print_flag: str = ""
     ) -> tuple[list[dict[str, Any]], list[AgentLog]]:
         """
         Run and evaluate the current version of the agent through a set of awaitables.
@@ -170,8 +168,8 @@ class Maestro:
         Args:
             awaitables (list[Awaitable]): A list of awaitables, each representing a run of the agent
             criticos (list[Critico]): A list of Critico objects, each corresponding to an awaitable
-            verbose (bool): If True, related information will be printed during evaluation.
-                Defaults to True.
+            verbose (bool): If True, additional information will be printed during evaluation.
+                Defaults to False.
             print_flag (str): A string to be put next to the printed info when `verbose` is True.
                 Used to distinguish printed info from different types of evaluations.
@@ -215,15 +213,24 @@ class Maestro:
         if verbose:
             for test_case in test_cases:
-                print("input:\n", test_case["input"])
-                print(f"log{print_flag}:\n", test_case["log"])
-                print(f"output{print_flag}:\n", test_case["output"])
-                print(f"eval score{print_flag}:\n", test_case["eval_score"])
-                print(f"eval feedback{print_flag}:\n", test_case["eval_feedback"])
+                print("=================agent excution result===================")
+                print(f"- input:\n{test_case['input']}\n")
+                print(f"- log{print_flag}:\n{test_case['log']}\n")
+                print(f"- output{print_flag}:\n{test_case['output']}\n")
+                print(f"- eval score{print_flag}:\n{test_case['eval_score']}\n")
+                print(f"- eval feedback{print_flag}:\n{test_case['eval_feedback']}\n")
+                print("=========================================================\n\n")
         return test_cases, agent_logs
-    async def _iterate(self, batch_size: int, sampler: ProportionalSampler, verbose: bool = True) -> bool:
+    async def _iterate(
+        self,
+        batch_size: int,
+        sampler: ProportionalSampler,
+        verbose: bool = False,
+        group_id: str | None = None,
+        pbar: tqdm | None = None,
+    ) -> bool:
         """
         An iterate step will propose changes to the current version of the agent and
         conduct a preliminary examination of the proposed changes.
@@ -236,8 +243,11 @@ class Maestro:
                 i.e. `critico`, where `batch_size` of them will be used to propose changes and the other
                 `batch_size` of them will be used for preliminary examinations.
             sampler (ProportionalSampler): Sampler to use for selecting setups.
-            verbose (bool): If True, related information will be printed during the iterate step.
-                Defaults to True.
+            verbose (bool): If True, additional information will be printed during the iterate step.
+                Defaults to False.
+            group_id (str, optional): An optional group ID to associate all runs together. If not provided,
+                a new UUID will be generated.
+            pbar (tqdm, optional): A progress bar to display the progress of the iteration. Defaults to None.
         Returns:
             bool: True if the proposed changes pass the preliminary examination and False otherwise.
@@ -250,23 +260,27 @@ class Maestro:
                 "No setup (simulator, critico) has been added to Maestro. Please add at least one setup before optimization."
             )
+        group_id = uuid4().hex if group_id is None else group_id
         setups = sampler.sample(batch_size * 2)
         awaitables = []
         criticos = []
         for setup in setups:
             simulator = setup["simulator"]
             critico = setup["critico"]
-            awaitables.append(simulator.run(num_runs=1))
+            awaitables.append(simulator.run(num_runs=1, group_id=group_id))
             criticos.append(critico)
         test_cases, agent_logs = await self._evaluate(awaitables=awaitables, criticos=criticos, verbose=verbose)
+        if pbar is not None:
+            pbar.update(len(test_cases))
         analysis, proposed_values = await self._client.propose_values(
             {
                 "params": params.export(),
                 "serialized_past_proposals": self._serialize_past_proposals(),
                 "test_cases": test_cases[:batch_size],
-                "max_proposals": self.max_proposals,
                 "goal": self.goal,
                 "param_graph": param_graph.export(),
             }
@@ -276,14 +290,13 @@ class Maestro:
         for param, value in proposed_values.items():
             changes.append({"param": param, "previous value": params.__getattr__(param), "new value": value})
             if verbose:
-                print("--------------------------")
-                print("proposed param change:", param)
+                print("=" * 60)
+                print("- proposed param change:", param)
                 print("")
-                print("previous value:", params.__getattr__(param))
+                print("- previous value:\n\n", params.__getattr__(param))
                 print("")
-                print("new value:", value)
-                print("-----------\n")
-                print("--------------------------")
+                print("- new value:\n\n", value)
+                print("=" * 60)
         self.log.append({"proposal id": len(self.log), "proposed changes": changes})
@@ -297,13 +310,16 @@ class Maestro:
         for test_case, agent_log, setup in zip(test_cases, agent_logs, setups):
             simulator = setup["simulator"]
             critico = setup["critico"]
-            new_awaitables.append(simulator.rerun([agent_log.simulation_tape]))
+            new_awaitables.append(simulator.rerun([agent_log.simulation_tape], group_id=group_id))
             new_criticos.append(critico)
         test_cases_updated, _ = await self._evaluate(
             awaitables=new_awaitables, criticos=new_criticos, verbose=verbose, print_flag=" (changed)"
         )
+        if pbar is not None:
+            pbar.update(len(test_cases_updated))
         for sample_id in range(0, batch_size * 2):
             test_cases_updated[sample_id]["previous_log"] = test_cases[sample_id]["log"]
             test_cases_updated[sample_id]["previous_output"] = test_cases[sample_id]["output"]
@@ -350,24 +366,24 @@ class Maestro:
             print("new avg score: ", new_score)
             print("accepted: ", review_decision["accepted"])
             print("review comment:\n", review_decision["full comment"])
-            print("-------------------------------------------\n\n")
+            print("-" * 60 + "\n\n")
         return review_decision["accepted"]
     async def optimize_config(
         self,
         total_rollouts: int,
-        batch_size: int = 4,
+        batch_size: int = 8,
         explore_radius: int = 5,
         explore_factor: float = 0.5,
-        verbose: bool = True,
+        verbose: bool = False,
     ):
         """
         Optimize the configs (parameters) of the agent.
         Args:
             total_rollouts (int): Total number of rollouts to use for optimization.
-            batch_size (int): Base batch size to use for individual optimization steps. Defaults to 4.
+            batch_size (int): Base batch size to use for individual optimization steps. Defaults to 8.
             explore_radius (int): A positive integer controlling the aggressiveness of exploration during optimization.
                 A larger `explore_radius` encourages the optimizer to make more substantial changes between successive configurations.
                 Defaults to 5.
@@ -376,7 +392,7 @@ class Maestro:
                 while a lower value allocates more rollouts to ensure the discovered configs are thoroughly evaluated.
                 Defaults to 0.5.
             verbose (bool): If True, related information will be printed during the optimization step.
-                Defaults to True.
+                Defaults to False.
         Raises:
             ValueError: If the input parameters are not valid.
@@ -390,47 +406,56 @@ class Maestro:
         if explore_factor <= 0 or explore_factor >= 1:
             raise ValueError(f"`explore_factor` must be a float between 0 and 1, got {explore_factor}.")
-        # total_rollouts = (iterate_steps * batch_size * 4 + select_steps * batch_size) * num_rounds
-        # explore_factor = (iterate_steps * batch_size * 4) / (iterate_steps * batch_size * 4 + select_steps * batch_size)
+        group_size = (batch_size + 1) // 2
+        # total_rollouts = (iterate_steps * group_size * 4 + select_steps * group_size) * num_rounds
+        # explore_factor = (iterate_steps * group_size * 4) / (iterate_steps * group_size * 4 + select_steps * group_size)
         iterate_steps: int = explore_radius
         select_steps: int = int(explore_radius * 4 * (1 - explore_factor) / explore_factor)
-        num_rounds: int = int(total_rollouts / (iterate_steps * batch_size * 4 + select_steps * batch_size))
-        if verbose:
-            print("optimize_config settings:")
-            print("  total_rollouts: ", total_rollouts)
-            print("  batch_size: ", batch_size)
-            print("  explore_radius: ", explore_radius)
-            print("  explore_factor: ", explore_factor)
-            print("-------------------------------------------")
-            print("  iterate_steps: ", iterate_steps)
-            print("  select_steps: ", select_steps)
-            print("  num_rounds: ", num_rounds)
-            print("-------------------------------------------\n\n")
+        num_rounds: int = int(total_rollouts / (iterate_steps * group_size * 4 + select_steps * group_size))
+        total_rollouts = num_rounds * (iterate_steps * group_size * 4 + select_steps * group_size)
+        print("optimize_config settings:")
+        print("  total_rollouts: ", total_rollouts)
+        print("  (adjusted) batch_size: ", group_size * 2)
+        print("  explore_radius: ", explore_radius)
+        print("  explore_factor: ", explore_factor)
+        print("-" * 60)
+        print("  iterate_steps: ", iterate_steps)
+        print("  select_steps: ", select_steps)
+        print("  num_rounds: ", num_rounds)
+        print("=" * 80 + "\n\n")
         if num_rounds == 0:
             raise ValueError(
                 f"`total_rollouts` is too small for the given `batch_size` {batch_size}, `explore_radius` {explore_radius}, and `explore_factor` {explore_factor}. "
-                f"Please increase `total_rollouts` to at least {iterate_steps * batch_size * 4 + select_steps * batch_size}."
+                f"Please increase `total_rollouts` to at least {iterate_steps * group_size * 4 + select_steps * group_size}."
             )
         sampler = ProportionalSampler(
             elements=self.setups,
             weights=[setup["weight"] for setup in self.setups],
         )
+        group_id = "Maestro-Config-" + uuid4().hex
+        pbar = tqdm(total=total_rollouts, desc="Total rollouts consumed for config optimization")
         for round in range(num_rounds):
-            if verbose:
-                print(f"================== Round {round + 1}/{num_rounds} ==================")
-                print("Total versions: ", len(self.versions))
-                print("Rebase to version: ", self.current_version)
-                print("Score (current base): ", self.versions[self.current_version]["average_score"])
-                print("Visits (current base): ", self.versions[self.current_version]["visits"])
-                print("Visits (total): ", self.total_visits)
+            print("\n\n" + "=" * 30 + f" Round {round + 1}/{num_rounds} begins" + "=" * 30)
+            print("Total versions accepted: ", len(self.versions))
+            print("Rebase to version: ", self.current_version)
+            print(
+                "Score for the current base version: %s based on %s rollouts"
+                % (
+                    self.versions[self.current_version]["average_score"],
+                    self.versions[self.current_version]["visits"] * group_size,
+                )
+            )
+            print("\n\n")
             new_version = False
             for _ in range(iterate_steps):
-                changes_accepted = await self._iterate(batch_size=batch_size, verbose=verbose, sampler=sampler)
+                changes_accepted = await self._iterate(
+                    batch_size=group_size, verbose=verbose, sampler=sampler, group_id=group_id, pbar=pbar
+                )
                 if changes_accepted:
                     new_version = True
@@ -453,19 +478,22 @@ class Maestro:
             for _ in range(select_steps):
                 await self._select(explore=True)
-                setups = sampler.sample(batch_size)
+                setups = sampler.sample(group_size)
                 awaitables = []
                 criticos = []
                 for setup in setups:
                     simulator = setup["simulator"]
                     critico = setup["critico"]
-                    awaitables.append(simulator.run(num_runs=1))
+                    awaitables.append(simulator.run(num_runs=1, group_id=group_id))
                     criticos.append(critico)
                 test_cases_validation, _ = await self._evaluate(
                     awaitables=awaitables, criticos=criticos, verbose=verbose, print_flag="(validation)"
                 )
+                if pbar is not None:
+                    pbar.update(len(test_cases_validation))
                 validation_score = 0.0
                 for test_case in test_cases_validation:
                     validation_score += test_case["eval_score"]
@@ -493,21 +521,26 @@ class Maestro:
             # Switch to the current version with highest score
             await self._select(explore=False)
-            if verbose:
-                print("Total versions: ", len(self.versions))
-                print("Best version: ", self.current_version)
-                print("Score (best version): ", self.versions[self.current_version]["average_score"])
-                print("Visits (best version): ", self.versions[self.current_version]["visits"])
-                print("Visits (total): ", self.total_visits)
-                print(
-                    "all versions: ",
-                    {
-                        i: {"score": self.versions[i]["average_score"], "visits": self.versions[i]["visits"]}
-                        for i in range(len(self.versions))
-                    },
+            print("\n\n" + "=" * 30 + f" Round {round + 1}/{num_rounds} finishes" + "=" * 30)
+            print("Total versions accepted: ", len(self.versions))
+            print("Best version index: ", self.current_version)
+            print(
+                "Score for the best version: %s based on %s rollouts"
+                % (
+                    self.versions[self.current_version]["average_score"],
+                    self.versions[self.current_version]["visits"] * group_size,
                 )
-                print("--------------------")
+            )
+            print(
+                "All versions: ",
+                {
+                    i: {"score": self.versions[i]["average_score"], "rollouts evaluated": self.versions[i]["visits"] * group_size}
+                    for i in range(len(self.versions))
+                },
+            )
+            print("--------------------")
             async def sync_to_platform():
                 payload = ConfigOptVizSchema(
@@ -543,18 +576,16 @@ class Maestro:
             if self.log_to_platform:
                 await sync_to_platform()
-                if verbose:
-                    print(
-                        f"Results of round {round + 1}/{num_rounds} uploaded to RELAI platform, visualization id: {self.config_opt_viz_id}"
-                    )
+                print(
+                    f"Results of round {round + 1}/{num_rounds} uploaded to RELAI platform, visualization id: {self.config_opt_viz_id}"
+                )
     async def optimize_structure(
         self,
         total_rollouts: int,
         description: Optional[str] = None,
         code_paths: Optional[list[str]] = None,
-        name: str = "No Name",
-        verbose: bool = True,
+        verbose: bool = False,
     ) -> str:
         """
         Propose structural changes (i.e. changes that cannot be achieved by setting parameters alone) to
@@ -567,15 +598,17 @@ class Maestro:
             description (str, optional): Text description of the current structure/workflow/... of the agent.
             code_paths (list[str], optional): A list of paths corresponding to code files containing
                 the implementation of the agent.
-            name (str, optional): Name of the graph optimization visualization on RELAI platform.
-                Defaults to "No Name".
-            verbose (bool): If True, related information will be printed during the optimization.
-                Defaults to True.
+            verbose (bool): If True, additional information will be printed during the optimization.
+                Defaults to False.
         Returns:
             str: Suggestion for structural changes to the agent.
         """
+        print("optimize_structure settings:")
+        print("  total_rollouts: ", total_rollouts)
+        print("=" * 80 + "\n\n")
         if code_paths is not None:
             code = extract_code(code_paths=code_paths)
         else:
@@ -585,17 +618,24 @@ class Maestro:
             elements=self.setups,
             weights=[setup["weight"] for setup in self.setups],
         )
+        group_id = "Maestro-Struct-" + uuid4().hex
+        print("=" * 80)
+        print("Running the agent to collect traces...\n\n")
         setups = sampler.sample(total_rollouts)
         awaitables = []
         criticos = []
         for setup in setups:
             simulator = setup["simulator"]
             critico = setup["critico"]
-            awaitables.append(simulator.run(num_runs=1))
+            awaitables.append(simulator.run(num_runs=1, group_id=group_id))
             criticos.append(critico)
         test_cases, _ = await self._evaluate(awaitables=awaitables, criticos=criticos, verbose=verbose)
+        print("=" * 80)
+        print("Optimizing structure...\n\n")
         suggestion = await self._client.optimize_structure(
             {
                 "agent_name": get_full_func_name(self.agent_fn),
@@ -611,7 +651,7 @@ class Maestro:
         async def sync_to_platform():
             payload = GraphOptVizSchema(
-                name=name,
+                name=self.name,
                 proposal=suggestion,
                 runs=[
                     RunSchema(
@@ -628,12 +668,12 @@ class Maestro:
             return await self._client.update_graph_opt_visual(payload)
+        print("=" * 40 + "suggestion" + "=" * 40)
+        print(suggestion)
+        print("=" * 90 + "\n\n")
         if self.log_to_platform:
             uid = await sync_to_platform()
-            if verbose:
-                print(f"Results uploaded to RELAI platform, visualization id: {uid}")
-        if verbose:
-            print("suggestion:\n", suggestion)
+            print(f"Results uploaded to RELAI platform, visualization id: {uid}")
         return suggestion

relai/mocker/persona.py CHANGED Viewed

@@ -141,16 +141,18 @@ class PersonaSet(Sequence[Persona]):
     A collection of Persona instances loaded from a persona set on the RELAI platform.
     """
-    def __init__(self, persona_set_id: str) -> None:
+    def __init__(self, persona_set_id: str, **persona_kwargs: Any) -> None:
         """
         Initializes the PersonaSet with the given persona set ID.
         Args:
             persona_set_id (str): The ID of the persona set on the RELAI platform.
+            **persona_kwargs: Keyword arguments that are forwarded to each Persona created from the set.
         """
         self.persona_set_id = persona_set_id
         self._user_personas = None
         self._personas = None
+        self._persona_kwargs = persona_kwargs
     def user_personas(self) -> list[str]:
         if self._user_personas is None:
@@ -161,7 +163,9 @@ class PersonaSet(Sequence[Persona]):
     def personas(self) -> list[Persona]:
         if self._personas is None:
-            self._personas = [Persona(user_persona=persona) for persona in self.user_personas()]
+            self._personas = [
+                Persona(user_persona=persona, **self._persona_kwargs) for persona in self.user_personas()
+            ]
         return self._personas
     @overload

relai/mocker/tool.py CHANGED Viewed

@@ -4,6 +4,7 @@ from uuid import uuid4
 from agents import Agent, Runner, SQLiteSession
+from ..utils import no_trace
 from .base_mocker import BaseMocker
@@ -48,12 +49,12 @@ class MockTool(BaseMocker):
                 "kwargs": kwargs,
             }
         )
-        result = Runner.run_sync(
-            self.agent,
-            agent_input,
-            session=self._session,
-        )
+        with no_trace():
+            result = Runner.run_sync(
+                self.agent,
+                agent_input,
+                session=self._session,
+            )
         output = result.final_output
         return output
@@ -64,11 +65,11 @@ class MockTool(BaseMocker):
                 "kwargs": kwargs,
             }
         )
-        result = await Runner.run(
-            self.agent,
-            agent_input,
-            session=self._session,
-        )
+        with no_trace():
+            result = await Runner.run(
+                self.agent,
+                agent_input,
+                session=self._session,
+            )
         output = result.final_output
         return output

relai/simulator.py CHANGED Viewed

@@ -203,15 +203,17 @@ class SyncSimulator(BaseSimulator):
             raise ValueError("client must be provided if log_runs is True")
         self.client = client
-    def run(self, num_runs: int) -> list[AgentLog]:
+    def run(self, num_runs: int, group_id: str | None = None) -> list[AgentLog]:
         """
         Run the simulator for a specified number of times.
         Args:
             num_runs (int): The number of simulation runs to execute.
+            group_id (str, optional): An optional group ID to associate all runs together. If not provided,
+                a new UUID will be generated.
         """
         agent_logs: list[AgentLog] = []
-        group_id = uuid4().hex
+        group_id = ("Simulate-" + uuid4().hex) if group_id is None else group_id
         tracking_on()
         for tape, config in self.tape_and_config_generator(num_runs):
             with _simulate(config), create_logging_span(tape.id):
@@ -235,16 +237,18 @@ class SyncSimulator(BaseSimulator):
         tracking_off()
         return agent_logs
-    def rerun(self, simulation_tapes: list[SimulationTape]) -> list[AgentLog]:
+    def rerun(self, simulation_tapes: list[SimulationTape], group_id: str | None = None) -> list[AgentLog]:
         """
         Rerun the simulator for a list of simulation tapes.
         Args:
             simulation_tapes (list[SimulationTape]): The list of simulation tapes to rerun. This allows for re-executing
                 the agent in an environment identical to a previous run and is useful for debugging and optimization.
+            group_id (str, optional): An optional group ID to associate all runs together. If not provided,
+                a new UUID will be generated.
         """
         agent_logs: list[AgentLog] = []
-        group_id = uuid4().hex
+        group_id = ("Simulate-" + uuid4().hex) if group_id is None else group_id
         tracking_on()
         for tape in simulation_tapes:
             new_tape = tape.copy()
@@ -299,14 +303,16 @@ class AsyncSimulator(BaseSimulator):
             raise ValueError("client must be provided if log_runs is True")
         self.client = client
-    async def run(self, num_runs: int) -> list[AgentLog]:
+    async def run(self, num_runs: int, group_id: str | None = None) -> list[AgentLog]:
         """Run the simulator for a specified number of times.
         Args:
             num_runs (int): The number of simulation runs to execute.
+            group_id (str, optional): An optional group ID to associate all runs together. If not provided,
+                a new UUID will be generated.
         """
         agent_logs: list[AgentLog] = []
-        group_id = uuid4().hex
+        group_id = ("Simulate-" + uuid4().hex) if group_id is None else group_id
         tracking_on()
         for tape, config in self.tape_and_config_generator(num_runs):
             with _simulate(config), create_logging_span(tape.id):
@@ -330,16 +336,18 @@ class AsyncSimulator(BaseSimulator):
         tracking_off()
         return agent_logs
-    async def rerun(self, simulation_tapes: list[SimulationTape]) -> list[AgentLog]:
+    async def rerun(self, simulation_tapes: list[SimulationTape], group_id: str | None = None) -> list[AgentLog]:
         """
         Rerun the simulator for a list of simulation tapes.
         Args:
             simulation_tapes (list[SimulationTape]): The list of simulation tapes to rerun. This allows for re-executing
                 the agent in an environment identical to a previous run and is useful for debugging and optimization.
+            group_id (str, optional): An optional group ID to associate all runs together. If not provided,
+                a new UUID will be generated.
         """
         agent_logs: list[AgentLog] = []
-        group_id = uuid4().hex
+        group_id = ("Simulate-" + uuid4().hex) if group_id is None else group_id
         tracking_on()
         for tape in simulation_tapes:
             new_tape = tape.copy()

relai/utils.py CHANGED Viewed

@@ -34,20 +34,57 @@ def create_logging_span(logger_id: str | None = None):
 def log_model(*args, **kwargs):
+    """
+    Logs a model call event.
+    Args:
+        name (str): Name of the model.
+        input (Any): Input to the model.
+        output (Any): Output from the model.
+        note (Optional[str]): Optional annotation.
+    """
     logger = get_current_logger()
     logger.log_model(*args, **kwargs)
 def log_tool(*args, **kwargs):
+    """
+    Logs a tool call event.
+    Args:
+        name (str): Name of the tool.
+        input (Any): Input to the tool.
+        output (Any): Output from the tool.
+        note (Optional[str]): Optional annotation.
+    """
     logger = get_current_logger()
     logger.log_tool(*args, **kwargs)
 def log_persona(*args, **kwargs):
+    """
+    Logs a persona activity.
+    Args:
+        name (str): Name of the persona.
+        model_name (str): Name of the model.
+        input (Any): Input to the persona.
+        output (Any): Output from the persona.
+        note (Optional[str]): Optional annotation.
+    """
     logger = get_current_logger()
     logger.log_persona(*args, **kwargs)
 def log_router(*args, **kwargs):
+    """
+    Logs a router event.
+    Args:
+        name (str): Name of the router.
+        input (Any): Input to the router.
+        output (Any): Output from the router.
+        note (Optional[str]): Optional annotation.
+    """
     logger = get_current_logger()
     logger.log_router(*args, **kwargs)

{relai-0.3.2.dist-info → relai-0.3.4.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: relai
-Version: 0.3.2
+Version: 0.3.4
 Summary: An SDK for building reliable AI agents
 Author-email: RELAI <priyatham@relai.ai>, RELAI <wwx@relai.ai>
 License:                                 Apache License
@@ -205,12 +205,12 @@ License:                                 Apache License
         See the License for the specific language governing permissions and
         limitations under the License.
 Classifier: License :: OSI Approved :: Apache Software License
-Classifier: Programming Language :: Python :: 3.9
+Classifier: Development Status :: 4 - Beta
 Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3.11
 Classifier: Programming Language :: Python :: 3.12
 Classifier: Programming Language :: Python :: 3.13
-Requires-Python: >=3.9
+Requires-Python: >=3.10
 Description-Content-Type: text/markdown
 License-File: LICENSE.md
 Requires-Dist: pydantic>=2.11.5
@@ -226,14 +226,22 @@ Dynamic: license-file
   <img align="center" src="docs/assets/relai-logo.png" width="460px" />
 </p>
 <p align="left">
-<h1 align="center">RELAI: Simulate → Evaluate → Optimize AI Agents</h1>
+<h1 align="center">Simulate → Evaluate → Optimize AI Agents</h1>
+<p align="center">
+  <a href="https://pypi.org/project/relai/"><img alt="PyPI" src="https://img.shields.io/pypi/v/relai.svg"></a>
+  <img alt="Python" src="https://img.shields.io/pypi/pyversions/relai.svg">
+  <a href="LICENSE.md"><img alt="License" src="https://img.shields.io/badge/license-Apache--2.0-blue.svg"></a>
+  <a href="http://docs.relai.ai"><img alt="Docs" src="https://img.shields.io/badge/docs-online-brightgreen.svg"></a>
+  <a href="https://github.com/relai-ai/relai-sdk/actions/workflows/upload-to-package-index.yml"><img alt="CI" src="https://img.shields.io/github/actions/workflow/status/relai-ai/relai-sdk/upload-to-package-index.yml?branch=main"></a>
+</p>
 **RELAI** is an SDK for building **reliable AI agents**. It streamlines the hardest parts of agent development—**simulation**, **evaluation**, and **optimization**—so you can iterate quickly with confidence.
 **What you get**
-- **Agent Simulation** — Create full/partial environments, define **LLM personas**, mock **MCP** servers & tools, and generate **synthetic data**. Optionally **condition simulation on real samples** to better match production.
-- **Agent Evaluation** — Mix **code-based** and **LLM-based** custom evaluators or use **RELAI platform evaluators**. Turn human reviews into **benchmarks** you can re-run.
-- **Agent Optimization (Maestro)** — Holistic optimizer that uses evaluator signals & feedback to improve prompts/configs **and** suggest **graph-level** changes. Also selects **best model/tool/graph** based on observed performance.
+- **Agent Simulation** — Create full/partial environments, define LLM personas, mock MCP servers & tools, and generate synthetic data. Optionally condition simulation on real samples to better match production.
+- **Agent Evaluation** — Mix code-based and LLM-based custom evaluators or use RELAI platform evaluators. Turn human reviews into benchmarks you can re-run.
+- **Agent Optimization (Maestro)** — Holistic optimizer that uses evaluator signals & feedback to improve **prompts/configs** and suggest **graph-level** changes. Maestro selects best model/tool/graph based on observed performance.
 ## Quickstart
@@ -249,10 +257,161 @@ uv add relai
 export RELAI_API_KEY="<RELAI_API_KEY>"
 ```
+### Example: A simple Stock Assistant Agent (Simulate → Evaluate → Optimize)
+Prerequisites: Needs an OpenAI API key and `openai-agents` installed to run the base agent.
+To use Maestro graph optimizer, save the following in a file called `stock-assistant.py` (or change the `code_paths` argument to `maestro.optimize_structure`).
+```python
+# ============================================================================
+# STEP 0 — Prerequisites
+# ============================================================================
+# export OPENAI_API_KEY="sk-..."
+# `uv add openai-agents`
+# export RELAI_API_KEY="relai-..."
+# Save as `stock-assistant.py`
+import asyncio
+from agents import Agent, Runner
+from relai import (
+    AgentOutputs,
+    AsyncRELAI,
+    AsyncSimulator,
+    SimulationTape,
+    random_env_generator,
+)
+from relai.critico import Critico
+from relai.critico.evaluate import RELAIFormatEvaluator
+from relai.maestro import Maestro, params, register_param
+from relai.mocker import Persona
+from relai.simulator import simulated
+# ============================================================================
+# STEP 1.1 — Decorate inputs/tools that will be simulated
+# ============================================================================
+@simulated
+async def get_user_query() -> str:
+    """Get user's query about stock prices."""
+    # In a real agent, this function might get input from a chat interface.
+    return input("Enter you stock query: ")
+# ============================================================================
+# STEP 1.2 — Register parameters for optimization
+# ============================================================================
+register_param(
+    "prompt",
+    type="prompt",
+    init_value="You are a helpful assistant for stock price questions.",
+    desc="system prompt for the agent",
+)
+# ============================================================================
+# STEP 2 — Your agent core
+# ============================================================================
+async def agent_fn(tape: SimulationTape) -> AgentOutputs:
+    # It is good practice to catch exceptions in agent function
+    # especially if the agent might raise errors with different configs
+    try:
+        question = await get_user_query()
+        agent = Agent(
+            name="Stock assistant",
+            instructions=params.prompt,  # access registered parameter
+            model="gpt-5-mini",
+        )
+        result = await Runner.run(agent, question)
+        tape.extras["format_rubrics"] = {"Prices must include cents (eg: $XXX.XX)": 1.0}
+        tape.agent_inputs["question"] = question  # trace inputs for later auditing
+        return {"summary": result.final_output}
+    except Exception as e:
+        return {"summary": str(e)}
+async def main() -> None:
+    # Set up your simulation environment
+    # Bind Personas/MockTools to fully-qualified function names
+    env_generator = random_env_generator(
+        config_set={
+            "__main__.get_user_query": [Persona(user_persona="A polite and curious user.")],
+        }
+    )
+    async with AsyncRELAI() as client:
+        # ============================================================================
+        # STEP 3 — Simulate
+        # ============================================================================
+        simulator = AsyncSimulator(agent_fn=agent_fn, env_generator=env_generator, client=client)
+        agent_logs = await simulator.run(num_runs=1)
+        # ============================================================================
+        # STEP 4 — Evaluate with Critico
+        # ============================================================================
+        critico = Critico(client=client)
+        format_evaluator = RELAIFormatEvaluator(client=client)
+        critico.add_evaluators({format_evaluator: 1.0})
+        critico_logs = await critico.evaluate(agent_logs)
+        # Publish evaluation report to the RELAI platform
+        await critico.report(critico_logs)
+        maestro = Maestro(client=client, agent_fn=agent_fn, log_to_platform=True, name="Stock assistant")
+        maestro.add_setup(simulator=simulator, critico=critico)
+        # ============================================================================
+        # STEP 5.1 — Optimize configs with Maestro (the parameters registered earlier in STEP 2)
+        # ============================================================================
+        # params.load("saved_config.json")  # load previous params if available
+        await maestro.optimize_config(
+            total_rollouts=20,  # Total number of rollouts to use for optimization.
+            batch_size=2,  # Base batch size to use for individual optimization steps. Defaults to 4.
+            explore_radius=1,  # A positive integer controlling the aggressiveness of exploration during optimization.
+            explore_factor=0.5,  # A float between 0 to 1 controlling the exploration-exploitation trade-off.
+            verbose=False,  # If True, additional information will be printed during the optimization step.
+        )
+        params.save("saved_config.json")  # save optimized params for future usage
+        # ============================================================================
+        # STEP 5.2 — Optimize agent structure with Maestro (changes that cannot be achieved by setting parameters alone)
+        # ============================================================================
+        await maestro.optimize_structure(
+            total_rollouts=10,  # Total number of rollouts to use for optimization.
+            code_paths=["stock-assistant.py"],  # A list of paths corresponding to code implementations of the agent.
+            verbose=False,  # If True, additional information will be printed during the optimization step.
+        )
+if __name__ == "__main__":
+    asyncio.run(main())
+```
+## Simulation
+Create controlled environments where agents interact and generate traces. Compose LLM personas, mock MCP tools/servers, and synthetic data; optionally condition on real events to align simulation ⇄ production.
+➡️ Learn more: [Simulator](https://docs.relai.ai/simulator.html)
+## Evaluation (Critico)
+Use code-based or LLM-based evaluators—or RELAI platform evaluators—and convert human reviews into benchmarks you can re-run in Simuation/CI pipeline.
+➡️ Learn more: [Evaluator](https://docs.relai.ai/evaluator.html)
+## Optimization (Maestro)
+Maestro is a holistic agent optimizer. It consumes evaluator/user feedback to improve prompts, configs, and even graph structure when prompt tuning isn’t enough. It can also select the best model, best tool, and best graph based on observed performance.
+➡️ Learn more: [Maestro](https://docs.relai.ai/maestro.html)
 ## Links
-- 📘 **Documentation:** [docs.relai.ai](#)
+- 📘 **Documentation:** [docs.relai.ai](http://docs.relai.ai)
 - 🧪 **Examples:** [relai-sdk/examples](examples)
+- 📖 **Tutorials:** [docs.relai.ai/tutorials/index.html](https://docs.relai.ai/tutorials/index.html)
 - 🌐 **Website:** [relai.ai](https://relai.ai)
 - 📰 **Maestro Technical Report:** [ArXiV](https://arxiv.org/abs/2509.04642)
 - 🌐 **Join the Community:** [Discord](https://discord.gg/sjaHJ34YYE)
@@ -260,3 +419,30 @@ export RELAI_API_KEY="<RELAI_API_KEY>"
 ## License
 Apache 2.0
+## Citation
+If you use the SDK in your research, please consider citing our work:
+```
+@misc{relai_sdk,
+  author       = {RELAI, Inc.,},
+  title        = {relai-sdk},
+  year         = {2025},
+  howpublished = {\url{https://github.com/relai-ai/relai-sdk}},
+  note         = {GitHub repository},
+  urldate      = {2025-10-20}
+}
+@misc{wang2025maestrojointgraph,
+  title={Maestro: Joint Graph & Config Optimization for Reliable AI Agents},
+  author={Wenxiao Wang and Priyatham Kattakinda and Soheil Feizi},
+  year={2025},
+  eprint={2509.04642},
+  archivePrefix={arXiv},
+  primaryClass={cs.AI},
+  url={https://arxiv.org/abs/2509.04642},
+}
+```
+<p align="center"> <sub>Made with ❤️ by the RELAI team — <a href="https://relai.ai">relai.ai</a> • <a href="https://discord.gg/sjaHJ34YYE">Community</a></sub> </p>

{relai-0.3.2.dist-info → relai-0.3.4.dist-info}/RECORD RENAMED Viewed

@@ -5,24 +5,24 @@ relai/benchmark.py,sha256=YTd2xu9aKlUcaWdHInV_7U5YroivYMgTk7UE1XMZBN4,15766
 relai/data.py,sha256=ne0H4EQ0B_yxE9fogoovGExuJuwqutSpuhNsl4UmcsU,7852
 relai/exporter.py,sha256=jZxrUjlYCOpRr7gdmbg6-LUL_fXmtMgPp89CgvP5Z7A,1932
 relai/flags.py,sha256=_GrjQg7mZq7BwEIedR6cjWY4grwsryqbKdgyiRr2P7k,1929
-relai/logger.py,sha256=YfS8U4P89iYz4BsV1717ND6JKgOYDO_dN53207tVkLw,18219
-relai/simulator.py,sha256=FqPvKz3nsT-u61t0Y8L8QikG6LOFxrVvhC9NCTMvWgs,15533
-relai/utils.py,sha256=nUmnMAi_2NoYO9u4hhS6D-AG2HG6TymwHpuI8XrND0Y,1385
+relai/logger.py,sha256=j6PdzNkltukWAqBGKAB2qH2p61kS60RwsupDz-gELB4,18358
+relai/simulator.py,sha256=oEC5oLODPo1vLGBaMUdDj0JovZlc595dez931ihDuXk,16465
+relai/utils.py,sha256=va3xz79NTLJiZKaBrS_3Y8dC4M_JEmf8uOwzwFYYqUU,2359
 relai/critico/__init__.py,sha256=c_mDXCVEzsQckDS4ZFOmANo8vB5Vjr1bvyQNimAPVR8,52
 relai/critico/critico.py,sha256=J1ek9v2J5WBnHnZknZEVppIrWGczVHxuRX7ghK6mpXM,7616
 relai/critico/evaluate.py,sha256=Bd-Hlsh2fz2AQ0SINoyqcdpdbWK2t8yrAPHv6UCueFY,31348
 relai/maestro/__init__.py,sha256=NVXy0v7yghGwGbtsPti4gQGtVA3vMgXdpIpiJUesqME,186
 relai/maestro/graph.py,sha256=SyY0rHzes3o5bSqlK66CQDUAeyChUhWJQM3FzJCBvfs,1850
-relai/maestro/optimizer.py,sha256=PABMEFIcHwDSun-d2qBfvDHS7gHw4odkBJISgpkG8b0,28240
+relai/maestro/optimizer.py,sha256=96rFxXN5bNDCSgOOPywbWk5AbbnJ6ncLK_Z2uh66sdU,29413
 relai/maestro/params.py,sha256=-0Dtk23ClHJR6Q-PsaKr-GwUylz0-BIIquJF2eA-p-I,8925
 relai/maestro/utils.py,sha256=WIE3cR8EMDVfAJozEfngh8DfOQdRPZMxxtN-M1cMmxo,7276
 relai/mocker/__init__.py,sha256=JP2xlSG6Szc0tSEiZzCN6UXdE66uy7AmRn-p358xFVM,102
 relai/mocker/base_mocker.py,sha256=BL4WYtdxWHZdKICfo9idW5i5MrkoxJDElcoeGk-jaJM,994
-relai/mocker/persona.py,sha256=VwAjRTIvzZ7AVGZdswuxO5F6tvIP0czrnHEbt7X7O6w,6450
-relai/mocker/tool.py,sha256=wgbmOOTlpVClDMWzfuJfsrNwGI99k9CwzjoaRMLkAyo,2112
+relai/mocker/persona.py,sha256=q2A_lwYrp7H6sKkguMIPl7FQ_6pL4kTaxGBJ1kU2aGA,6678
+relai/mocker/tool.py,sha256=dHXkVcD9D6HMNlBj13V7GTgW_99a_-3tf9rC6iLDFn8,2229
 relai/schema/visual.py,sha256=Y6BP5CHxLU0e7sTfNjgKmG2GD0R9a8rvITusxd-d-UE,2443
-relai-0.3.2.dist-info/licenses/LICENSE.md,sha256=UNo7WT0mbmbUFjRGzRGaBtybmBPB7xd2ls9tfCkv0oc,10979
-relai-0.3.2.dist-info/METADATA,sha256=q8bEvJMTJ7WDElFls0mXZXWwqYe4grFJsVC3HK-bMEE,15129
-relai-0.3.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-relai-0.3.2.dist-info/top_level.txt,sha256=pRyA93fRj-HsukRNHyS4sHdvLO4TY8VvBMK44KcxRA4,6
-relai-0.3.2.dist-info/RECORD,,
+relai-0.3.4.dist-info/licenses/LICENSE.md,sha256=UNo7WT0mbmbUFjRGzRGaBtybmBPB7xd2ls9tfCkv0oc,10979
+relai-0.3.4.dist-info/METADATA,sha256=VYH3VpOpZUXP7K-EQzu_cfhUrLDBOk5bICMTDs72Z54,23531
+relai-0.3.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+relai-0.3.4.dist-info/top_level.txt,sha256=pRyA93fRj-HsukRNHyS4sHdvLO4TY8VvBMK44KcxRA4,6
+relai-0.3.4.dist-info/RECORD,,

{relai-0.3.2.dist-info → relai-0.3.4.dist-info}/WHEEL RENAMED Viewed

File without changes

{relai-0.3.2.dist-info → relai-0.3.4.dist-info}/licenses/LICENSE.md RENAMED Viewed

File without changes

{relai-0.3.2.dist-info → relai-0.3.4.dist-info}/top_level.txt RENAMED Viewed

File without changes

relai 0.3.2__py3-none-any.whl → 0.3.4__py3-none-any.whl

Potentially problematic release.

relai 0.3.2py3-none-any.whl → 0.3.4py3-none-any.whl