relai 0.3.2__py3-none-any.whl → 0.3.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of relai might be problematic. Click here for more details.

relai/logger.py CHANGED
@@ -31,8 +31,11 @@ def flatten(mapping: Mapping[str, Any]) -> Iterator[tuple[str, AttributeValue]]:
31
31
  yield f"{key}.{sub_key}", sub_value
32
32
  elif isinstance(value, list) and any(isinstance(item, Mapping) for item in value):
33
33
  for index, sub_mapping in enumerate(value):
34
- for sub_key, sub_value in flatten(sub_mapping):
35
- yield f"{key}.{index}.{sub_key}", sub_value
34
+ if isinstance(sub_mapping, Mapping):
35
+ for sub_key, sub_value in flatten(sub_mapping):
36
+ yield f"{key}.{index}.{sub_key}", sub_value
37
+ else:
38
+ yield f"{key}.{index}", sub_mapping
36
39
  else:
37
40
  if isinstance(value, Enum):
38
41
  value = value.value
@@ -4,6 +4,9 @@ import json
4
4
  import os
5
5
  from datetime import datetime, timezone
6
6
  from typing import Any, Awaitable, Optional
7
+ from uuid import uuid4
8
+
9
+ from tqdm.auto import tqdm
7
10
 
8
11
  from relai import AsyncRELAI
9
12
  from relai.critico.critico import Critico, CriticoLog
@@ -28,7 +31,6 @@ class Maestro:
28
31
  agent_fn: AsyncAgent,
29
32
  goal: Optional[str] = None,
30
33
  max_memory: int = 20,
31
- max_proposals: int = 3,
32
34
  name: str = "No Name",
33
35
  log_to_platform: bool = True,
34
36
  ):
@@ -40,8 +42,6 @@ class Maestro:
40
42
  will be considered as the only goal. Defaults to None.
41
43
  max_memory (int, optional): Control the maximum number of previous optimization history visible at each
42
44
  optimization step. Defaults to 20.
43
- max_proposals (int, optional): Control the maximum number of proposals to consider at each optimization step.
44
- Defaults to 3.
45
45
  name (str, optional): Name of the configuration optimization visualization on RELAI platform.
46
46
  Defaults to "No Name".
47
47
  log_to_platform (bool): Whether to log optimization progress and results on RELAI platform.
@@ -53,7 +53,6 @@ class Maestro:
53
53
  self.max_memory: int = max_memory
54
54
  self._client: AsyncRELAI = client
55
55
  self.goal: str = goal if goal is not None else "Higher scores"
56
- self.max_proposals: int = max_proposals
57
56
  self.log_to_platform: bool = log_to_platform
58
57
  self.config_opt_viz_id: str | None = None
59
58
  self.name: str = name
@@ -135,8 +134,7 @@ class Maestro:
135
134
  """
136
135
  self.total_visits += 1
137
136
  self.versions[self.current_version]["average_score"] = (
138
- self.versions[self.current_version]["average_score"] * self.versions[self.current_version]["visits"]
139
- + score
137
+ self.versions[self.current_version]["average_score"] * self.versions[self.current_version]["visits"] + score
140
138
  ) / (self.versions[self.current_version]["visits"] + 1.0)
141
139
  self.versions[self.current_version]["visits"] += 1
142
140
 
@@ -162,7 +160,7 @@ class Maestro:
162
160
  return str(agent_outputs)
163
161
 
164
162
  async def _evaluate(
165
- self, awaitables: list[Awaitable], criticos: list[Critico], verbose: bool = True, print_flag: str = ""
163
+ self, awaitables: list[Awaitable], criticos: list[Critico], verbose: bool = False, print_flag: str = ""
166
164
  ) -> tuple[list[dict[str, Any]], list[AgentLog]]:
167
165
  """
168
166
  Run and evaluate the current version of the agent through a set of awaitables.
@@ -170,8 +168,8 @@ class Maestro:
170
168
  Args:
171
169
  awaitables (list[Awaitable]): A list of awaitables, each representing a run of the agent
172
170
  criticos (list[Critico]): A list of Critico objects, each corresponding to an awaitable
173
- verbose (bool): If True, related information will be printed during evaluation.
174
- Defaults to True.
171
+ verbose (bool): If True, additional information will be printed during evaluation.
172
+ Defaults to False.
175
173
  print_flag (str): A string to be put next to the printed info when `verbose` is True.
176
174
  Used to distinguish printed info from different types of evaluations.
177
175
 
@@ -215,15 +213,24 @@ class Maestro:
215
213
 
216
214
  if verbose:
217
215
  for test_case in test_cases:
218
- print("input:\n", test_case["input"])
219
- print(f"log{print_flag}:\n", test_case["log"])
220
- print(f"output{print_flag}:\n", test_case["output"])
221
- print(f"eval score{print_flag}:\n", test_case["eval_score"])
222
- print(f"eval feedback{print_flag}:\n", test_case["eval_feedback"])
216
+ print("=================agent excution result===================")
217
+ print(f"- input:\n{test_case['input']}\n")
218
+ print(f"- log{print_flag}:\n{test_case['log']}\n")
219
+ print(f"- output{print_flag}:\n{test_case['output']}\n")
220
+ print(f"- eval score{print_flag}:\n{test_case['eval_score']}\n")
221
+ print(f"- eval feedback{print_flag}:\n{test_case['eval_feedback']}\n")
222
+ print("=========================================================\n\n")
223
223
 
224
224
  return test_cases, agent_logs
225
225
 
226
- async def _iterate(self, batch_size: int, sampler: ProportionalSampler, verbose: bool = True) -> bool:
226
+ async def _iterate(
227
+ self,
228
+ batch_size: int,
229
+ sampler: ProportionalSampler,
230
+ verbose: bool = False,
231
+ group_id: str | None = None,
232
+ pbar: tqdm | None = None,
233
+ ) -> bool:
227
234
  """
228
235
  An iterate step will propose changes to the current version of the agent and
229
236
  conduct a preliminary examination of the proposed changes.
@@ -236,8 +243,11 @@ class Maestro:
236
243
  i.e. `critico`, where `batch_size` of them will be used to propose changes and the other
237
244
  `batch_size` of them will be used for preliminary examinations.
238
245
  sampler (ProportionalSampler): Sampler to use for selecting setups.
239
- verbose (bool): If True, related information will be printed during the iterate step.
240
- Defaults to True.
246
+ verbose (bool): If True, additional information will be printed during the iterate step.
247
+ Defaults to False.
248
+ group_id (str, optional): An optional group ID to associate all runs together. If not provided,
249
+ a new UUID will be generated.
250
+ pbar (tqdm, optional): A progress bar to display the progress of the iteration. Defaults to None.
241
251
 
242
252
  Returns:
243
253
  bool: True if the proposed changes pass the preliminary examination and False otherwise.
@@ -250,23 +260,27 @@ class Maestro:
250
260
  "No setup (simulator, critico) has been added to Maestro. Please add at least one setup before optimization."
251
261
  )
252
262
 
263
+ group_id = uuid4().hex if group_id is None else group_id
264
+
253
265
  setups = sampler.sample(batch_size * 2)
254
266
  awaitables = []
255
267
  criticos = []
256
268
  for setup in setups:
257
269
  simulator = setup["simulator"]
258
270
  critico = setup["critico"]
259
- awaitables.append(simulator.run(num_runs=1))
271
+ awaitables.append(simulator.run(num_runs=1, group_id=group_id))
260
272
  criticos.append(critico)
261
273
 
262
274
  test_cases, agent_logs = await self._evaluate(awaitables=awaitables, criticos=criticos, verbose=verbose)
263
275
 
276
+ if pbar is not None:
277
+ pbar.update(len(test_cases))
278
+
264
279
  analysis, proposed_values = await self._client.propose_values(
265
280
  {
266
281
  "params": params.export(),
267
282
  "serialized_past_proposals": self._serialize_past_proposals(),
268
283
  "test_cases": test_cases[:batch_size],
269
- "max_proposals": self.max_proposals,
270
284
  "goal": self.goal,
271
285
  "param_graph": param_graph.export(),
272
286
  }
@@ -276,14 +290,13 @@ class Maestro:
276
290
  for param, value in proposed_values.items():
277
291
  changes.append({"param": param, "previous value": params.__getattr__(param), "new value": value})
278
292
  if verbose:
279
- print("--------------------------")
280
- print("proposed param change:", param)
293
+ print("=" * 60)
294
+ print("- proposed param change:", param)
281
295
  print("")
282
- print("previous value:", params.__getattr__(param))
296
+ print("- previous value:\n\n", params.__getattr__(param))
283
297
  print("")
284
- print("new value:", value)
285
- print("-----------\n")
286
- print("--------------------------")
298
+ print("- new value:\n\n", value)
299
+ print("=" * 60)
287
300
 
288
301
  self.log.append({"proposal id": len(self.log), "proposed changes": changes})
289
302
 
@@ -297,13 +310,16 @@ class Maestro:
297
310
  for test_case, agent_log, setup in zip(test_cases, agent_logs, setups):
298
311
  simulator = setup["simulator"]
299
312
  critico = setup["critico"]
300
- new_awaitables.append(simulator.rerun([agent_log.simulation_tape]))
313
+ new_awaitables.append(simulator.rerun([agent_log.simulation_tape], group_id=group_id))
301
314
  new_criticos.append(critico)
302
315
 
303
316
  test_cases_updated, _ = await self._evaluate(
304
317
  awaitables=new_awaitables, criticos=new_criticos, verbose=verbose, print_flag=" (changed)"
305
318
  )
306
319
 
320
+ if pbar is not None:
321
+ pbar.update(len(test_cases_updated))
322
+
307
323
  for sample_id in range(0, batch_size * 2):
308
324
  test_cases_updated[sample_id]["previous_log"] = test_cases[sample_id]["log"]
309
325
  test_cases_updated[sample_id]["previous_output"] = test_cases[sample_id]["output"]
@@ -350,24 +366,24 @@ class Maestro:
350
366
  print("new avg score: ", new_score)
351
367
  print("accepted: ", review_decision["accepted"])
352
368
  print("review comment:\n", review_decision["full comment"])
353
- print("-------------------------------------------\n\n")
369
+ print("-" * 60 + "\n\n")
354
370
 
355
371
  return review_decision["accepted"]
356
372
 
357
373
  async def optimize_config(
358
374
  self,
359
375
  total_rollouts: int,
360
- batch_size: int = 4,
376
+ batch_size: int = 8,
361
377
  explore_radius: int = 5,
362
378
  explore_factor: float = 0.5,
363
- verbose: bool = True,
379
+ verbose: bool = False,
364
380
  ):
365
381
  """
366
382
  Optimize the configs (parameters) of the agent.
367
383
 
368
384
  Args:
369
385
  total_rollouts (int): Total number of rollouts to use for optimization.
370
- batch_size (int): Base batch size to use for individual optimization steps. Defaults to 4.
386
+ batch_size (int): Base batch size to use for individual optimization steps. Defaults to 8.
371
387
  explore_radius (int): A positive integer controlling the aggressiveness of exploration during optimization.
372
388
  A larger `explore_radius` encourages the optimizer to make more substantial changes between successive configurations.
373
389
  Defaults to 5.
@@ -376,7 +392,7 @@ class Maestro:
376
392
  while a lower value allocates more rollouts to ensure the discovered configs are thoroughly evaluated.
377
393
  Defaults to 0.5.
378
394
  verbose (bool): If True, related information will be printed during the optimization step.
379
- Defaults to True.
395
+ Defaults to False.
380
396
 
381
397
  Raises:
382
398
  ValueError: If the input parameters are not valid.
@@ -390,47 +406,56 @@ class Maestro:
390
406
  if explore_factor <= 0 or explore_factor >= 1:
391
407
  raise ValueError(f"`explore_factor` must be a float between 0 and 1, got {explore_factor}.")
392
408
 
393
- # total_rollouts = (iterate_steps * batch_size * 4 + select_steps * batch_size) * num_rounds
394
- # explore_factor = (iterate_steps * batch_size * 4) / (iterate_steps * batch_size * 4 + select_steps * batch_size)
409
+ group_size = (batch_size + 1) // 2
410
+ # total_rollouts = (iterate_steps * group_size * 4 + select_steps * group_size) * num_rounds
411
+ # explore_factor = (iterate_steps * group_size * 4) / (iterate_steps * group_size * 4 + select_steps * group_size)
395
412
  iterate_steps: int = explore_radius
396
413
  select_steps: int = int(explore_radius * 4 * (1 - explore_factor) / explore_factor)
397
- num_rounds: int = int(total_rollouts / (iterate_steps * batch_size * 4 + select_steps * batch_size))
398
-
399
- if verbose:
400
- print("optimize_config settings:")
401
- print(" total_rollouts: ", total_rollouts)
402
- print(" batch_size: ", batch_size)
403
- print(" explore_radius: ", explore_radius)
404
- print(" explore_factor: ", explore_factor)
405
- print("-------------------------------------------")
406
- print(" iterate_steps: ", iterate_steps)
407
- print(" select_steps: ", select_steps)
408
- print(" num_rounds: ", num_rounds)
409
- print("-------------------------------------------\n\n")
414
+ num_rounds: int = int(total_rollouts / (iterate_steps * group_size * 4 + select_steps * group_size))
415
+ total_rollouts = num_rounds * (iterate_steps * group_size * 4 + select_steps * group_size)
416
+
417
+ print("optimize_config settings:")
418
+ print(" total_rollouts: ", total_rollouts)
419
+ print(" (adjusted) batch_size: ", group_size * 2)
420
+ print(" explore_radius: ", explore_radius)
421
+ print(" explore_factor: ", explore_factor)
422
+ print("-" * 60)
423
+ print(" iterate_steps: ", iterate_steps)
424
+ print(" select_steps: ", select_steps)
425
+ print(" num_rounds: ", num_rounds)
426
+ print("=" * 80 + "\n\n")
410
427
 
411
428
  if num_rounds == 0:
412
429
  raise ValueError(
413
430
  f"`total_rollouts` is too small for the given `batch_size` {batch_size}, `explore_radius` {explore_radius}, and `explore_factor` {explore_factor}. "
414
- f"Please increase `total_rollouts` to at least {iterate_steps * batch_size * 4 + select_steps * batch_size}."
431
+ f"Please increase `total_rollouts` to at least {iterate_steps * group_size * 4 + select_steps * group_size}."
415
432
  )
416
433
 
417
434
  sampler = ProportionalSampler(
418
435
  elements=self.setups,
419
436
  weights=[setup["weight"] for setup in self.setups],
420
437
  )
438
+ group_id = "Maestro-Config-" + uuid4().hex
439
+ pbar = tqdm(total=total_rollouts, desc="Total rollouts consumed for config optimization")
421
440
 
422
441
  for round in range(num_rounds):
423
- if verbose:
424
- print(f"================== Round {round + 1}/{num_rounds} ==================")
425
- print("Total versions: ", len(self.versions))
426
- print("Rebase to version: ", self.current_version)
427
- print("Score (current base): ", self.versions[self.current_version]["average_score"])
428
- print("Visits (current base): ", self.versions[self.current_version]["visits"])
429
- print("Visits (total): ", self.total_visits)
442
+ print("\n\n" + "=" * 30 + f" Round {round + 1}/{num_rounds} begins" + "=" * 30)
443
+ print("Total versions accepted: ", len(self.versions))
444
+ print("Rebase to version: ", self.current_version)
445
+ print(
446
+ "Score for the current base version: %s based on %s rollouts"
447
+ % (
448
+ self.versions[self.current_version]["average_score"],
449
+ self.versions[self.current_version]["visits"] * group_size,
450
+ )
451
+ )
452
+ print("\n\n")
430
453
 
431
454
  new_version = False
432
455
  for _ in range(iterate_steps):
433
- changes_accepted = await self._iterate(batch_size=batch_size, verbose=verbose, sampler=sampler)
456
+ changes_accepted = await self._iterate(
457
+ batch_size=group_size, verbose=verbose, sampler=sampler, group_id=group_id, pbar=pbar
458
+ )
434
459
  if changes_accepted:
435
460
  new_version = True
436
461
 
@@ -453,19 +478,22 @@ class Maestro:
453
478
  for _ in range(select_steps):
454
479
  await self._select(explore=True)
455
480
 
456
- setups = sampler.sample(batch_size)
481
+ setups = sampler.sample(group_size)
457
482
  awaitables = []
458
483
  criticos = []
459
484
  for setup in setups:
460
485
  simulator = setup["simulator"]
461
486
  critico = setup["critico"]
462
- awaitables.append(simulator.run(num_runs=1))
487
+ awaitables.append(simulator.run(num_runs=1, group_id=group_id))
463
488
  criticos.append(critico)
464
489
 
465
490
  test_cases_validation, _ = await self._evaluate(
466
491
  awaitables=awaitables, criticos=criticos, verbose=verbose, print_flag="(validation)"
467
492
  )
468
493
 
494
+ if pbar is not None:
495
+ pbar.update(len(test_cases_validation))
496
+
469
497
  validation_score = 0.0
470
498
  for test_case in test_cases_validation:
471
499
  validation_score += test_case["eval_score"]
@@ -493,21 +521,26 @@ class Maestro:
493
521
 
494
522
  # Switch to the current version with highest score
495
523
  await self._select(explore=False)
496
- if verbose:
497
- print("Total versions: ", len(self.versions))
498
- print("Best version: ", self.current_version)
499
- print("Score (best version): ", self.versions[self.current_version]["average_score"])
500
- print("Visits (best version): ", self.versions[self.current_version]["visits"])
501
- print("Visits (total): ", self.total_visits)
502
524
 
503
- print(
504
- "all versions: ",
505
- {
506
- i: {"score": self.versions[i]["average_score"], "visits": self.versions[i]["visits"]}
507
- for i in range(len(self.versions))
508
- },
525
+ print("\n\n" + "=" * 30 + f" Round {round + 1}/{num_rounds} finishes" + "=" * 30)
526
+ print("Total versions accepted: ", len(self.versions))
527
+ print("Best version index: ", self.current_version)
528
+ print(
529
+ "Score for the best version: %s based on %s rollouts"
530
+ % (
531
+ self.versions[self.current_version]["average_score"],
532
+ self.versions[self.current_version]["visits"] * group_size,
509
533
  )
510
- print("--------------------")
534
+ )
535
+
536
+ print(
537
+ "All versions: ",
538
+ {
539
+ i: {"score": self.versions[i]["average_score"], "rollouts evaluated": self.versions[i]["visits"] * group_size}
540
+ for i in range(len(self.versions))
541
+ },
542
+ )
543
+ print("--------------------")
511
544
 
512
545
  async def sync_to_platform():
513
546
  payload = ConfigOptVizSchema(
@@ -543,18 +576,16 @@ class Maestro:
543
576
 
544
577
  if self.log_to_platform:
545
578
  await sync_to_platform()
546
- if verbose:
547
- print(
548
- f"Results of round {round + 1}/{num_rounds} uploaded to RELAI platform, visualization id: {self.config_opt_viz_id}"
549
- )
579
+ print(
580
+ f"Results of round {round + 1}/{num_rounds} uploaded to RELAI platform, visualization id: {self.config_opt_viz_id}"
581
+ )
550
582
 
551
583
  async def optimize_structure(
552
584
  self,
553
585
  total_rollouts: int,
554
586
  description: Optional[str] = None,
555
587
  code_paths: Optional[list[str]] = None,
556
- name: str = "No Name",
557
- verbose: bool = True,
588
+ verbose: bool = False,
558
589
  ) -> str:
559
590
  """
560
591
  Propose structural changes (i.e. changes that cannot be achieved by setting parameters alone) to
@@ -567,15 +598,17 @@ class Maestro:
567
598
  description (str, optional): Text description of the current structure/workflow/... of the agent.
568
599
  code_paths (list[str], optional): A list of paths corresponding to code files containing
569
600
  the implementation of the agent.
570
- name (str, optional): Name of the graph optimization visualization on RELAI platform.
571
- Defaults to "No Name".
572
- verbose (bool): If True, related information will be printed during the optimization.
573
- Defaults to True.
601
+ verbose (bool): If True, additional information will be printed during the optimization.
602
+ Defaults to False.
574
603
 
575
604
  Returns:
576
605
  str: Suggestion for structural changes to the agent.
577
606
  """
578
607
 
608
+ print("optimize_structure settings:")
609
+ print(" total_rollouts: ", total_rollouts)
610
+ print("=" * 80 + "\n\n")
611
+
579
612
  if code_paths is not None:
580
613
  code = extract_code(code_paths=code_paths)
581
614
  else:
@@ -585,17 +618,24 @@ class Maestro:
585
618
  elements=self.setups,
586
619
  weights=[setup["weight"] for setup in self.setups],
587
620
  )
621
+ group_id = "Maestro-Struct-" + uuid4().hex
622
+
623
+ print("=" * 80)
624
+ print("Running the agent to collect traces...\n\n")
625
+
588
626
  setups = sampler.sample(total_rollouts)
589
627
  awaitables = []
590
628
  criticos = []
591
629
  for setup in setups:
592
630
  simulator = setup["simulator"]
593
631
  critico = setup["critico"]
594
- awaitables.append(simulator.run(num_runs=1))
632
+ awaitables.append(simulator.run(num_runs=1, group_id=group_id))
595
633
  criticos.append(critico)
596
634
 
597
635
  test_cases, _ = await self._evaluate(awaitables=awaitables, criticos=criticos, verbose=verbose)
598
636
 
637
+ print("=" * 80)
638
+ print("Optimizing structure...\n\n")
599
639
  suggestion = await self._client.optimize_structure(
600
640
  {
601
641
  "agent_name": get_full_func_name(self.agent_fn),
@@ -611,7 +651,7 @@ class Maestro:
611
651
 
612
652
  async def sync_to_platform():
613
653
  payload = GraphOptVizSchema(
614
- name=name,
654
+ name=self.name,
615
655
  proposal=suggestion,
616
656
  runs=[
617
657
  RunSchema(
@@ -628,12 +668,12 @@ class Maestro:
628
668
 
629
669
  return await self._client.update_graph_opt_visual(payload)
630
670
 
671
+ print("=" * 40 + "suggestion" + "=" * 40)
672
+ print(suggestion)
673
+ print("=" * 90 + "\n\n")
674
+
631
675
  if self.log_to_platform:
632
676
  uid = await sync_to_platform()
633
- if verbose:
634
- print(f"Results uploaded to RELAI platform, visualization id: {uid}")
635
-
636
- if verbose:
637
- print("suggestion:\n", suggestion)
677
+ print(f"Results uploaded to RELAI platform, visualization id: {uid}")
638
678
 
639
679
  return suggestion
relai/mocker/persona.py CHANGED
@@ -141,16 +141,18 @@ class PersonaSet(Sequence[Persona]):
141
141
  A collection of Persona instances loaded from a persona set on the RELAI platform.
142
142
  """
143
143
 
144
- def __init__(self, persona_set_id: str) -> None:
144
+ def __init__(self, persona_set_id: str, **persona_kwargs: Any) -> None:
145
145
  """
146
146
  Initializes the PersonaSet with the given persona set ID.
147
147
 
148
148
  Args:
149
149
  persona_set_id (str): The ID of the persona set on the RELAI platform.
150
+ **persona_kwargs: Keyword arguments that are forwarded to each Persona created from the set.
150
151
  """
151
152
  self.persona_set_id = persona_set_id
152
153
  self._user_personas = None
153
154
  self._personas = None
155
+ self._persona_kwargs = persona_kwargs
154
156
 
155
157
  def user_personas(self) -> list[str]:
156
158
  if self._user_personas is None:
@@ -161,7 +163,9 @@ class PersonaSet(Sequence[Persona]):
161
163
 
162
164
  def personas(self) -> list[Persona]:
163
165
  if self._personas is None:
164
- self._personas = [Persona(user_persona=persona) for persona in self.user_personas()]
166
+ self._personas = [
167
+ Persona(user_persona=persona, **self._persona_kwargs) for persona in self.user_personas()
168
+ ]
165
169
  return self._personas
166
170
 
167
171
  @overload
relai/mocker/tool.py CHANGED
@@ -4,6 +4,7 @@ from uuid import uuid4
4
4
 
5
5
  from agents import Agent, Runner, SQLiteSession
6
6
 
7
+ from ..utils import no_trace
7
8
  from .base_mocker import BaseMocker
8
9
 
9
10
 
@@ -48,12 +49,12 @@ class MockTool(BaseMocker):
48
49
  "kwargs": kwargs,
49
50
  }
50
51
  )
51
-
52
- result = Runner.run_sync(
53
- self.agent,
54
- agent_input,
55
- session=self._session,
56
- )
52
+ with no_trace():
53
+ result = Runner.run_sync(
54
+ self.agent,
55
+ agent_input,
56
+ session=self._session,
57
+ )
57
58
  output = result.final_output
58
59
  return output
59
60
 
@@ -64,11 +65,11 @@ class MockTool(BaseMocker):
64
65
  "kwargs": kwargs,
65
66
  }
66
67
  )
67
-
68
- result = await Runner.run(
69
- self.agent,
70
- agent_input,
71
- session=self._session,
72
- )
68
+ with no_trace():
69
+ result = await Runner.run(
70
+ self.agent,
71
+ agent_input,
72
+ session=self._session,
73
+ )
73
74
  output = result.final_output
74
75
  return output
relai/simulator.py CHANGED
@@ -203,15 +203,17 @@ class SyncSimulator(BaseSimulator):
203
203
  raise ValueError("client must be provided if log_runs is True")
204
204
  self.client = client
205
205
 
206
- def run(self, num_runs: int) -> list[AgentLog]:
206
+ def run(self, num_runs: int, group_id: str | None = None) -> list[AgentLog]:
207
207
  """
208
208
  Run the simulator for a specified number of times.
209
209
 
210
210
  Args:
211
211
  num_runs (int): The number of simulation runs to execute.
212
+ group_id (str, optional): An optional group ID to associate all runs together. If not provided,
213
+ a new UUID will be generated.
212
214
  """
213
215
  agent_logs: list[AgentLog] = []
214
- group_id = uuid4().hex
216
+ group_id = ("Simulate-" + uuid4().hex) if group_id is None else group_id
215
217
  tracking_on()
216
218
  for tape, config in self.tape_and_config_generator(num_runs):
217
219
  with _simulate(config), create_logging_span(tape.id):
@@ -235,16 +237,18 @@ class SyncSimulator(BaseSimulator):
235
237
  tracking_off()
236
238
  return agent_logs
237
239
 
238
- def rerun(self, simulation_tapes: list[SimulationTape]) -> list[AgentLog]:
240
+ def rerun(self, simulation_tapes: list[SimulationTape], group_id: str | None = None) -> list[AgentLog]:
239
241
  """
240
242
  Rerun the simulator for a list of simulation tapes.
241
243
 
242
244
  Args:
243
245
  simulation_tapes (list[SimulationTape]): The list of simulation tapes to rerun. This allows for re-executing
244
246
  the agent in an environment identical to a previous run and is useful for debugging and optimization.
247
+ group_id (str, optional): An optional group ID to associate all runs together. If not provided,
248
+ a new UUID will be generated.
245
249
  """
246
250
  agent_logs: list[AgentLog] = []
247
- group_id = uuid4().hex
251
+ group_id = ("Simulate-" + uuid4().hex) if group_id is None else group_id
248
252
  tracking_on()
249
253
  for tape in simulation_tapes:
250
254
  new_tape = tape.copy()
@@ -299,14 +303,16 @@ class AsyncSimulator(BaseSimulator):
299
303
  raise ValueError("client must be provided if log_runs is True")
300
304
  self.client = client
301
305
 
302
- async def run(self, num_runs: int) -> list[AgentLog]:
306
+ async def run(self, num_runs: int, group_id: str | None = None) -> list[AgentLog]:
303
307
  """Run the simulator for a specified number of times.
304
308
 
305
309
  Args:
306
310
  num_runs (int): The number of simulation runs to execute.
311
+ group_id (str, optional): An optional group ID to associate all runs together. If not provided,
312
+ a new UUID will be generated.
307
313
  """
308
314
  agent_logs: list[AgentLog] = []
309
- group_id = uuid4().hex
315
+ group_id = ("Simulate-" + uuid4().hex) if group_id is None else group_id
310
316
  tracking_on()
311
317
  for tape, config in self.tape_and_config_generator(num_runs):
312
318
  with _simulate(config), create_logging_span(tape.id):
@@ -330,16 +336,18 @@ class AsyncSimulator(BaseSimulator):
330
336
  tracking_off()
331
337
  return agent_logs
332
338
 
333
- async def rerun(self, simulation_tapes: list[SimulationTape]) -> list[AgentLog]:
339
+ async def rerun(self, simulation_tapes: list[SimulationTape], group_id: str | None = None) -> list[AgentLog]:
334
340
  """
335
341
  Rerun the simulator for a list of simulation tapes.
336
342
 
337
343
  Args:
338
344
  simulation_tapes (list[SimulationTape]): The list of simulation tapes to rerun. This allows for re-executing
339
345
  the agent in an environment identical to a previous run and is useful for debugging and optimization.
346
+ group_id (str, optional): An optional group ID to associate all runs together. If not provided,
347
+ a new UUID will be generated.
340
348
  """
341
349
  agent_logs: list[AgentLog] = []
342
- group_id = uuid4().hex
350
+ group_id = ("Simulate-" + uuid4().hex) if group_id is None else group_id
343
351
  tracking_on()
344
352
  for tape in simulation_tapes:
345
353
  new_tape = tape.copy()
relai/utils.py CHANGED
@@ -34,20 +34,57 @@ def create_logging_span(logger_id: str | None = None):
34
34
 
35
35
 
36
36
  def log_model(*args, **kwargs):
37
+ """
38
+ Logs a model call event.
39
+
40
+ Args:
41
+ name (str): Name of the model.
42
+ input (Any): Input to the model.
43
+ output (Any): Output from the model.
44
+ note (Optional[str]): Optional annotation.
45
+ """
37
46
  logger = get_current_logger()
38
47
  logger.log_model(*args, **kwargs)
39
48
 
40
49
 
41
50
  def log_tool(*args, **kwargs):
51
+ """
52
+ Logs a tool call event.
53
+
54
+ Args:
55
+ name (str): Name of the tool.
56
+ input (Any): Input to the tool.
57
+ output (Any): Output from the tool.
58
+ note (Optional[str]): Optional annotation.
59
+ """
42
60
  logger = get_current_logger()
43
61
  logger.log_tool(*args, **kwargs)
44
62
 
45
63
 
46
64
  def log_persona(*args, **kwargs):
65
+ """
66
+ Logs a persona activity.
67
+
68
+ Args:
69
+ name (str): Name of the persona.
70
+ model_name (str): Name of the model.
71
+ input (Any): Input to the persona.
72
+ output (Any): Output from the persona.
73
+ note (Optional[str]): Optional annotation.
74
+ """
47
75
  logger = get_current_logger()
48
76
  logger.log_persona(*args, **kwargs)
49
77
 
50
78
 
51
79
  def log_router(*args, **kwargs):
80
+ """
81
+ Logs a router event.
82
+
83
+ Args:
84
+ name (str): Name of the router.
85
+ input (Any): Input to the router.
86
+ output (Any): Output from the router.
87
+ note (Optional[str]): Optional annotation.
88
+ """
52
89
  logger = get_current_logger()
53
90
  logger.log_router(*args, **kwargs)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: relai
3
- Version: 0.3.2
3
+ Version: 0.3.4
4
4
  Summary: An SDK for building reliable AI agents
5
5
  Author-email: RELAI <priyatham@relai.ai>, RELAI <wwx@relai.ai>
6
6
  License: Apache License
@@ -205,12 +205,12 @@ License: Apache License
205
205
  See the License for the specific language governing permissions and
206
206
  limitations under the License.
207
207
  Classifier: License :: OSI Approved :: Apache Software License
208
- Classifier: Programming Language :: Python :: 3.9
208
+ Classifier: Development Status :: 4 - Beta
209
209
  Classifier: Programming Language :: Python :: 3.10
210
210
  Classifier: Programming Language :: Python :: 3.11
211
211
  Classifier: Programming Language :: Python :: 3.12
212
212
  Classifier: Programming Language :: Python :: 3.13
213
- Requires-Python: >=3.9
213
+ Requires-Python: >=3.10
214
214
  Description-Content-Type: text/markdown
215
215
  License-File: LICENSE.md
216
216
  Requires-Dist: pydantic>=2.11.5
@@ -226,14 +226,22 @@ Dynamic: license-file
226
226
  <img align="center" src="docs/assets/relai-logo.png" width="460px" />
227
227
  </p>
228
228
  <p align="left">
229
- <h1 align="center">RELAI: Simulate → Evaluate → Optimize AI Agents</h1>
229
+ <h1 align="center">Simulate → Evaluate → Optimize AI Agents</h1>
230
+ <p align="center">
231
+ <a href="https://pypi.org/project/relai/"><img alt="PyPI" src="https://img.shields.io/pypi/v/relai.svg"></a>
232
+ <img alt="Python" src="https://img.shields.io/pypi/pyversions/relai.svg">
233
+ <a href="LICENSE.md"><img alt="License" src="https://img.shields.io/badge/license-Apache--2.0-blue.svg"></a>
234
+ <a href="http://docs.relai.ai"><img alt="Docs" src="https://img.shields.io/badge/docs-online-brightgreen.svg"></a>
235
+ <a href="https://github.com/relai-ai/relai-sdk/actions/workflows/upload-to-package-index.yml"><img alt="CI" src="https://img.shields.io/github/actions/workflow/status/relai-ai/relai-sdk/upload-to-package-index.yml?branch=main"></a>
236
+ </p>
237
+
230
238
 
231
239
  **RELAI** is an SDK for building **reliable AI agents**. It streamlines the hardest parts of agent development—**simulation**, **evaluation**, and **optimization**—so you can iterate quickly with confidence.
232
240
 
233
241
  **What you get**
234
- - **Agent Simulation** — Create full/partial environments, define **LLM personas**, mock **MCP** servers & tools, and generate **synthetic data**. Optionally **condition simulation on real samples** to better match production.
235
- - **Agent Evaluation** — Mix **code-based** and **LLM-based** custom evaluators or use **RELAI platform evaluators**. Turn human reviews into **benchmarks** you can re-run.
236
- - **Agent Optimization (Maestro)** — Holistic optimizer that uses evaluator signals & feedback to improve prompts/configs **and** suggest **graph-level** changes. Also selects **best model/tool/graph** based on observed performance.
242
+ - **Agent Simulation** — Create full/partial environments, define LLM personas, mock MCP servers & tools, and generate synthetic data. Optionally condition simulation on real samples to better match production.
243
+ - **Agent Evaluation** — Mix code-based and LLM-based custom evaluators or use RELAI platform evaluators. Turn human reviews into benchmarks you can re-run.
244
+ - **Agent Optimization (Maestro)** — Holistic optimizer that uses evaluator signals & feedback to improve **prompts/configs** and suggest **graph-level** changes. Maestro selects best model/tool/graph based on observed performance.
237
245
 
238
246
  ## Quickstart
239
247
 
@@ -249,10 +257,161 @@ uv add relai
249
257
  export RELAI_API_KEY="<RELAI_API_KEY>"
250
258
  ```
251
259
 
260
+ ### Example: A simple Stock Assistant Agent (Simulate → Evaluate → Optimize)
261
+ Prerequisites: Needs an OpenAI API key and `openai-agents` installed to run the base agent.
262
+ To use Maestro graph optimizer, save the following in a file called `stock-assistant.py` (or change the `code_paths` argument to `maestro.optimize_structure`).
263
+ ```python
264
+ # ============================================================================
265
+ # STEP 0 — Prerequisites
266
+ # ============================================================================
267
+ # export OPENAI_API_KEY="sk-..."
268
+ # `uv add openai-agents`
269
+ # export RELAI_API_KEY="relai-..."
270
+ # Save as `stock-assistant.py`
271
+
272
+ import asyncio
273
+
274
+ from agents import Agent, Runner
275
+
276
+ from relai import (
277
+ AgentOutputs,
278
+ AsyncRELAI,
279
+ AsyncSimulator,
280
+ SimulationTape,
281
+ random_env_generator,
282
+ )
283
+ from relai.critico import Critico
284
+ from relai.critico.evaluate import RELAIFormatEvaluator
285
+ from relai.maestro import Maestro, params, register_param
286
+ from relai.mocker import Persona
287
+ from relai.simulator import simulated
288
+
289
+ # ============================================================================
290
+ # STEP 1.1 — Decorate inputs/tools that will be simulated
291
+ # ============================================================================
292
+
293
+
294
+ @simulated
295
+ async def get_user_query() -> str:
296
+ """Get user's query about stock prices."""
297
+ # In a real agent, this function might get input from a chat interface.
298
+ return input("Enter you stock query: ")
299
+
300
+
301
+ # ============================================================================
302
+ # STEP 1.2 — Register parameters for optimization
303
+ # ============================================================================
304
+
305
+ register_param(
306
+ "prompt",
307
+ type="prompt",
308
+ init_value="You are a helpful assistant for stock price questions.",
309
+ desc="system prompt for the agent",
310
+ )
311
+
312
+ # ============================================================================
313
+ # STEP 2 — Your agent core
314
+ # ============================================================================
315
+
316
+
317
+ async def agent_fn(tape: SimulationTape) -> AgentOutputs:
318
+ # It is good practice to catch exceptions in agent function
319
+ # especially if the agent might raise errors with different configs
320
+ try:
321
+ question = await get_user_query()
322
+ agent = Agent(
323
+ name="Stock assistant",
324
+ instructions=params.prompt, # access registered parameter
325
+ model="gpt-5-mini",
326
+ )
327
+ result = await Runner.run(agent, question)
328
+ tape.extras["format_rubrics"] = {"Prices must include cents (eg: $XXX.XX)": 1.0}
329
+ tape.agent_inputs["question"] = question # trace inputs for later auditing
330
+ return {"summary": result.final_output}
331
+ except Exception as e:
332
+ return {"summary": str(e)}
333
+
334
+
335
+
336
+ async def main() -> None:
337
+ # Set up your simulation environment
338
+ # Bind Personas/MockTools to fully-qualified function names
339
+ env_generator = random_env_generator(
340
+ config_set={
341
+ "__main__.get_user_query": [Persona(user_persona="A polite and curious user.")],
342
+ }
343
+ )
344
+
345
+ async with AsyncRELAI() as client:
346
+ # ============================================================================
347
+ # STEP 3 — Simulate
348
+ # ============================================================================
349
+ simulator = AsyncSimulator(agent_fn=agent_fn, env_generator=env_generator, client=client)
350
+ agent_logs = await simulator.run(num_runs=1)
351
+
352
+ # ============================================================================
353
+ # STEP 4 — Evaluate with Critico
354
+ # ============================================================================
355
+ critico = Critico(client=client)
356
+ format_evaluator = RELAIFormatEvaluator(client=client)
357
+ critico.add_evaluators({format_evaluator: 1.0})
358
+ critico_logs = await critico.evaluate(agent_logs)
359
+
360
+ # Publish evaluation report to the RELAI platform
361
+ await critico.report(critico_logs)
362
+
363
+ maestro = Maestro(client=client, agent_fn=agent_fn, log_to_platform=True, name="Stock assistant")
364
+ maestro.add_setup(simulator=simulator, critico=critico)
365
+
366
+ # ============================================================================
367
+ # STEP 5.1 — Optimize configs with Maestro (the parameters registered earlier in STEP 2)
368
+ # ============================================================================
369
+
370
+ # params.load("saved_config.json") # load previous params if available
371
+ await maestro.optimize_config(
372
+ total_rollouts=20, # Total number of rollouts to use for optimization.
373
+ batch_size=2, # Base batch size to use for individual optimization steps. Defaults to 4.
374
+ explore_radius=1, # A positive integer controlling the aggressiveness of exploration during optimization.
375
+ explore_factor=0.5, # A float between 0 to 1 controlling the exploration-exploitation trade-off.
376
+ verbose=False, # If True, additional information will be printed during the optimization step.
377
+ )
378
+ params.save("saved_config.json") # save optimized params for future usage
379
+
380
+ # ============================================================================
381
+ # STEP 5.2 — Optimize agent structure with Maestro (changes that cannot be achieved by setting parameters alone)
382
+ # ============================================================================
383
+
384
+ await maestro.optimize_structure(
385
+ total_rollouts=10, # Total number of rollouts to use for optimization.
386
+ code_paths=["stock-assistant.py"], # A list of paths corresponding to code implementations of the agent.
387
+ verbose=False, # If True, additional information will be printed during the optimization step.
388
+ )
389
+
390
+
391
+ if __name__ == "__main__":
392
+ asyncio.run(main())
393
+
394
+ ```
395
+ ## Simulation
396
+ Create controlled environments where agents interact and generate traces. Compose LLM personas, mock MCP tools/servers, and synthetic data; optionally condition on real events to align simulation ⇄ production.
397
+
398
+ ➡️ Learn more: [Simulator](https://docs.relai.ai/simulator.html)
399
+
400
+ ## Evaluation (Critico)
401
+ Use code-based or LLM-based evaluators—or RELAI platform evaluators—and convert human reviews into benchmarks you can re-run in Simuation/CI pipeline.
402
+
403
+ ➡️ Learn more: [Evaluator](https://docs.relai.ai/evaluator.html)
404
+
405
+ ## Optimization (Maestro)
406
+ Maestro is a holistic agent optimizer. It consumes evaluator/user feedback to improve prompts, configs, and even graph structure when prompt tuning isn’t enough. It can also select the best model, best tool, and best graph based on observed performance.
407
+
408
+ ➡️ Learn more: [Maestro](https://docs.relai.ai/maestro.html)
409
+
252
410
  ## Links
253
411
 
254
- - 📘 **Documentation:** [docs.relai.ai](#)
412
+ - 📘 **Documentation:** [docs.relai.ai](http://docs.relai.ai)
255
413
  - 🧪 **Examples:** [relai-sdk/examples](examples)
414
+ - 📖 **Tutorials:** [docs.relai.ai/tutorials/index.html](https://docs.relai.ai/tutorials/index.html)
256
415
  - 🌐 **Website:** [relai.ai](https://relai.ai)
257
416
  - 📰 **Maestro Technical Report:** [ArXiV](https://arxiv.org/abs/2509.04642)
258
417
  - 🌐 **Join the Community:** [Discord](https://discord.gg/sjaHJ34YYE)
@@ -260,3 +419,30 @@ export RELAI_API_KEY="<RELAI_API_KEY>"
260
419
  ## License
261
420
 
262
421
  Apache 2.0
422
+
423
+ ## Citation
424
+ If you use the SDK in your research, please consider citing our work:
425
+
426
+ ```
427
+ @misc{relai_sdk,
428
+ author = {RELAI, Inc.,},
429
+ title = {relai-sdk},
430
+ year = {2025},
431
+ howpublished = {\url{https://github.com/relai-ai/relai-sdk}},
432
+ note = {GitHub repository},
433
+ urldate = {2025-10-20}
434
+ }
435
+
436
+ @misc{wang2025maestrojointgraph,
437
+ title={Maestro: Joint Graph & Config Optimization for Reliable AI Agents},
438
+ author={Wenxiao Wang and Priyatham Kattakinda and Soheil Feizi},
439
+ year={2025},
440
+ eprint={2509.04642},
441
+ archivePrefix={arXiv},
442
+ primaryClass={cs.AI},
443
+ url={https://arxiv.org/abs/2509.04642},
444
+ }
445
+ ```
446
+
447
+ <p align="center"> <sub>Made with ❤️ by the RELAI team — <a href="https://relai.ai">relai.ai</a> • <a href="https://discord.gg/sjaHJ34YYE">Community</a></sub> </p>
448
+
@@ -5,24 +5,24 @@ relai/benchmark.py,sha256=YTd2xu9aKlUcaWdHInV_7U5YroivYMgTk7UE1XMZBN4,15766
5
5
  relai/data.py,sha256=ne0H4EQ0B_yxE9fogoovGExuJuwqutSpuhNsl4UmcsU,7852
6
6
  relai/exporter.py,sha256=jZxrUjlYCOpRr7gdmbg6-LUL_fXmtMgPp89CgvP5Z7A,1932
7
7
  relai/flags.py,sha256=_GrjQg7mZq7BwEIedR6cjWY4grwsryqbKdgyiRr2P7k,1929
8
- relai/logger.py,sha256=YfS8U4P89iYz4BsV1717ND6JKgOYDO_dN53207tVkLw,18219
9
- relai/simulator.py,sha256=FqPvKz3nsT-u61t0Y8L8QikG6LOFxrVvhC9NCTMvWgs,15533
10
- relai/utils.py,sha256=nUmnMAi_2NoYO9u4hhS6D-AG2HG6TymwHpuI8XrND0Y,1385
8
+ relai/logger.py,sha256=j6PdzNkltukWAqBGKAB2qH2p61kS60RwsupDz-gELB4,18358
9
+ relai/simulator.py,sha256=oEC5oLODPo1vLGBaMUdDj0JovZlc595dez931ihDuXk,16465
10
+ relai/utils.py,sha256=va3xz79NTLJiZKaBrS_3Y8dC4M_JEmf8uOwzwFYYqUU,2359
11
11
  relai/critico/__init__.py,sha256=c_mDXCVEzsQckDS4ZFOmANo8vB5Vjr1bvyQNimAPVR8,52
12
12
  relai/critico/critico.py,sha256=J1ek9v2J5WBnHnZknZEVppIrWGczVHxuRX7ghK6mpXM,7616
13
13
  relai/critico/evaluate.py,sha256=Bd-Hlsh2fz2AQ0SINoyqcdpdbWK2t8yrAPHv6UCueFY,31348
14
14
  relai/maestro/__init__.py,sha256=NVXy0v7yghGwGbtsPti4gQGtVA3vMgXdpIpiJUesqME,186
15
15
  relai/maestro/graph.py,sha256=SyY0rHzes3o5bSqlK66CQDUAeyChUhWJQM3FzJCBvfs,1850
16
- relai/maestro/optimizer.py,sha256=PABMEFIcHwDSun-d2qBfvDHS7gHw4odkBJISgpkG8b0,28240
16
+ relai/maestro/optimizer.py,sha256=96rFxXN5bNDCSgOOPywbWk5AbbnJ6ncLK_Z2uh66sdU,29413
17
17
  relai/maestro/params.py,sha256=-0Dtk23ClHJR6Q-PsaKr-GwUylz0-BIIquJF2eA-p-I,8925
18
18
  relai/maestro/utils.py,sha256=WIE3cR8EMDVfAJozEfngh8DfOQdRPZMxxtN-M1cMmxo,7276
19
19
  relai/mocker/__init__.py,sha256=JP2xlSG6Szc0tSEiZzCN6UXdE66uy7AmRn-p358xFVM,102
20
20
  relai/mocker/base_mocker.py,sha256=BL4WYtdxWHZdKICfo9idW5i5MrkoxJDElcoeGk-jaJM,994
21
- relai/mocker/persona.py,sha256=VwAjRTIvzZ7AVGZdswuxO5F6tvIP0czrnHEbt7X7O6w,6450
22
- relai/mocker/tool.py,sha256=wgbmOOTlpVClDMWzfuJfsrNwGI99k9CwzjoaRMLkAyo,2112
21
+ relai/mocker/persona.py,sha256=q2A_lwYrp7H6sKkguMIPl7FQ_6pL4kTaxGBJ1kU2aGA,6678
22
+ relai/mocker/tool.py,sha256=dHXkVcD9D6HMNlBj13V7GTgW_99a_-3tf9rC6iLDFn8,2229
23
23
  relai/schema/visual.py,sha256=Y6BP5CHxLU0e7sTfNjgKmG2GD0R9a8rvITusxd-d-UE,2443
24
- relai-0.3.2.dist-info/licenses/LICENSE.md,sha256=UNo7WT0mbmbUFjRGzRGaBtybmBPB7xd2ls9tfCkv0oc,10979
25
- relai-0.3.2.dist-info/METADATA,sha256=q8bEvJMTJ7WDElFls0mXZXWwqYe4grFJsVC3HK-bMEE,15129
26
- relai-0.3.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
27
- relai-0.3.2.dist-info/top_level.txt,sha256=pRyA93fRj-HsukRNHyS4sHdvLO4TY8VvBMK44KcxRA4,6
28
- relai-0.3.2.dist-info/RECORD,,
24
+ relai-0.3.4.dist-info/licenses/LICENSE.md,sha256=UNo7WT0mbmbUFjRGzRGaBtybmBPB7xd2ls9tfCkv0oc,10979
25
+ relai-0.3.4.dist-info/METADATA,sha256=VYH3VpOpZUXP7K-EQzu_cfhUrLDBOk5bICMTDs72Z54,23531
26
+ relai-0.3.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
27
+ relai-0.3.4.dist-info/top_level.txt,sha256=pRyA93fRj-HsukRNHyS4sHdvLO4TY8VvBMK44KcxRA4,6
28
+ relai-0.3.4.dist-info/RECORD,,
File without changes