relai 0.3.2__py3-none-any.whl → 0.3.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of relai might be problematic. Click here for more details.

relai/logger.py CHANGED
@@ -31,8 +31,11 @@ def flatten(mapping: Mapping[str, Any]) -> Iterator[tuple[str, AttributeValue]]:
31
31
  yield f"{key}.{sub_key}", sub_value
32
32
  elif isinstance(value, list) and any(isinstance(item, Mapping) for item in value):
33
33
  for index, sub_mapping in enumerate(value):
34
- for sub_key, sub_value in flatten(sub_mapping):
35
- yield f"{key}.{index}.{sub_key}", sub_value
34
+ if isinstance(sub_mapping, Mapping):
35
+ for sub_key, sub_value in flatten(sub_mapping):
36
+ yield f"{key}.{index}.{sub_key}", sub_value
37
+ else:
38
+ yield f"{key}.{index}", sub_mapping
36
39
  else:
37
40
  if isinstance(value, Enum):
38
41
  value = value.value
@@ -4,6 +4,9 @@ import json
4
4
  import os
5
5
  from datetime import datetime, timezone
6
6
  from typing import Any, Awaitable, Optional
7
+ from uuid import uuid4
8
+
9
+ from tqdm.auto import tqdm
7
10
 
8
11
  from relai import AsyncRELAI
9
12
  from relai.critico.critico import Critico, CriticoLog
@@ -28,7 +31,6 @@ class Maestro:
28
31
  agent_fn: AsyncAgent,
29
32
  goal: Optional[str] = None,
30
33
  max_memory: int = 20,
31
- max_proposals: int = 3,
32
34
  name: str = "No Name",
33
35
  log_to_platform: bool = True,
34
36
  ):
@@ -40,8 +42,6 @@ class Maestro:
40
42
  will be considered as the only goal. Defaults to None.
41
43
  max_memory (int, optional): Control the maximum number of previous optimization history visible at each
42
44
  optimization step. Defaults to 20.
43
- max_proposals (int, optional): Control the maximum number of proposals to consider at each optimization step.
44
- Defaults to 3.
45
45
  name (str, optional): Name of the configuration optimization visualization on RELAI platform.
46
46
  Defaults to "No Name".
47
47
  log_to_platform (bool): Whether to log optimization progress and results on RELAI platform.
@@ -53,7 +53,6 @@ class Maestro:
53
53
  self.max_memory: int = max_memory
54
54
  self._client: AsyncRELAI = client
55
55
  self.goal: str = goal if goal is not None else "Higher scores"
56
- self.max_proposals: int = max_proposals
57
56
  self.log_to_platform: bool = log_to_platform
58
57
  self.config_opt_viz_id: str | None = None
59
58
  self.name: str = name
@@ -162,7 +161,7 @@ class Maestro:
162
161
  return str(agent_outputs)
163
162
 
164
163
  async def _evaluate(
165
- self, awaitables: list[Awaitable], criticos: list[Critico], verbose: bool = True, print_flag: str = ""
164
+ self, awaitables: list[Awaitable], criticos: list[Critico], verbose: bool = False, print_flag: str = ""
166
165
  ) -> tuple[list[dict[str, Any]], list[AgentLog]]:
167
166
  """
168
167
  Run and evaluate the current version of the agent through a set of awaitables.
@@ -170,8 +169,8 @@ class Maestro:
170
169
  Args:
171
170
  awaitables (list[Awaitable]): A list of awaitables, each representing a run of the agent
172
171
  criticos (list[Critico]): A list of Critico objects, each corresponding to an awaitable
173
- verbose (bool): If True, related information will be printed during evaluation.
174
- Defaults to True.
172
+ verbose (bool): If True, additional information will be printed during evaluation.
173
+ Defaults to False.
175
174
  print_flag (str): A string to be put next to the printed info when `verbose` is True.
176
175
  Used to distinguish printed info from different types of evaluations.
177
176
 
@@ -215,15 +214,24 @@ class Maestro:
215
214
 
216
215
  if verbose:
217
216
  for test_case in test_cases:
218
- print("input:\n", test_case["input"])
219
- print(f"log{print_flag}:\n", test_case["log"])
220
- print(f"output{print_flag}:\n", test_case["output"])
221
- print(f"eval score{print_flag}:\n", test_case["eval_score"])
222
- print(f"eval feedback{print_flag}:\n", test_case["eval_feedback"])
217
+ print("=================agent excution result===================")
218
+ print(f"- input:\n{test_case['input']}\n")
219
+ print(f"- log{print_flag}:\n{test_case['log']}\n")
220
+ print(f"- output{print_flag}:\n{test_case['output']}\n")
221
+ print(f"- eval score{print_flag}:\n{test_case['eval_score']}\n")
222
+ print(f"- eval feedback{print_flag}:\n{test_case['eval_feedback']}\n")
223
+ print("=========================================================\n\n")
223
224
 
224
225
  return test_cases, agent_logs
225
226
 
226
- async def _iterate(self, batch_size: int, sampler: ProportionalSampler, verbose: bool = True) -> bool:
227
+ async def _iterate(
228
+ self,
229
+ batch_size: int,
230
+ sampler: ProportionalSampler,
231
+ verbose: bool = False,
232
+ group_id: str | None = None,
233
+ pbar: tqdm | None = None,
234
+ ) -> bool:
227
235
  """
228
236
  An iterate step will propose changes to the current version of the agent and
229
237
  conduct a preliminary examination of the proposed changes.
@@ -236,8 +244,11 @@ class Maestro:
236
244
  i.e. `critico`, where `batch_size` of them will be used to propose changes and the other
237
245
  `batch_size` of them will be used for preliminary examinations.
238
246
  sampler (ProportionalSampler): Sampler to use for selecting setups.
239
- verbose (bool): If True, related information will be printed during the iterate step.
240
- Defaults to True.
247
+ verbose (bool): If True, additional information will be printed during the iterate step.
248
+ Defaults to False.
249
+ group_id (str, optional): An optional group ID to associate all runs together. If not provided,
250
+ a new UUID will be generated.
251
+ pbar (tqdm, optional): A progress bar to display the progress of the iteration. Defaults to None.
241
252
 
242
253
  Returns:
243
254
  bool: True if the proposed changes pass the preliminary examination and False otherwise.
@@ -250,23 +261,27 @@ class Maestro:
250
261
  "No setup (simulator, critico) has been added to Maestro. Please add at least one setup before optimization."
251
262
  )
252
263
 
264
+ group_id = uuid4().hex if group_id is None else group_id
265
+
253
266
  setups = sampler.sample(batch_size * 2)
254
267
  awaitables = []
255
268
  criticos = []
256
269
  for setup in setups:
257
270
  simulator = setup["simulator"]
258
271
  critico = setup["critico"]
259
- awaitables.append(simulator.run(num_runs=1))
272
+ awaitables.append(simulator.run(num_runs=1, group_id=group_id))
260
273
  criticos.append(critico)
261
274
 
262
275
  test_cases, agent_logs = await self._evaluate(awaitables=awaitables, criticos=criticos, verbose=verbose)
263
276
 
277
+ if pbar is not None:
278
+ pbar.update(len(test_cases))
279
+
264
280
  analysis, proposed_values = await self._client.propose_values(
265
281
  {
266
282
  "params": params.export(),
267
283
  "serialized_past_proposals": self._serialize_past_proposals(),
268
284
  "test_cases": test_cases[:batch_size],
269
- "max_proposals": self.max_proposals,
270
285
  "goal": self.goal,
271
286
  "param_graph": param_graph.export(),
272
287
  }
@@ -276,14 +291,13 @@ class Maestro:
276
291
  for param, value in proposed_values.items():
277
292
  changes.append({"param": param, "previous value": params.__getattr__(param), "new value": value})
278
293
  if verbose:
279
- print("--------------------------")
280
- print("proposed param change:", param)
294
+ print("=" * 60)
295
+ print("- proposed param change:", param)
281
296
  print("")
282
- print("previous value:", params.__getattr__(param))
297
+ print("- previous value:\n\n", params.__getattr__(param))
283
298
  print("")
284
- print("new value:", value)
285
- print("-----------\n")
286
- print("--------------------------")
299
+ print("- new value:\n\n", value)
300
+ print("=" * 60)
287
301
 
288
302
  self.log.append({"proposal id": len(self.log), "proposed changes": changes})
289
303
 
@@ -297,13 +311,16 @@ class Maestro:
297
311
  for test_case, agent_log, setup in zip(test_cases, agent_logs, setups):
298
312
  simulator = setup["simulator"]
299
313
  critico = setup["critico"]
300
- new_awaitables.append(simulator.rerun([agent_log.simulation_tape]))
314
+ new_awaitables.append(simulator.rerun([agent_log.simulation_tape], group_id=group_id))
301
315
  new_criticos.append(critico)
302
316
 
303
317
  test_cases_updated, _ = await self._evaluate(
304
318
  awaitables=new_awaitables, criticos=new_criticos, verbose=verbose, print_flag=" (changed)"
305
319
  )
306
320
 
321
+ if pbar is not None:
322
+ pbar.update(len(test_cases_updated))
323
+
307
324
  for sample_id in range(0, batch_size * 2):
308
325
  test_cases_updated[sample_id]["previous_log"] = test_cases[sample_id]["log"]
309
326
  test_cases_updated[sample_id]["previous_output"] = test_cases[sample_id]["output"]
@@ -350,7 +367,7 @@ class Maestro:
350
367
  print("new avg score: ", new_score)
351
368
  print("accepted: ", review_decision["accepted"])
352
369
  print("review comment:\n", review_decision["full comment"])
353
- print("-------------------------------------------\n\n")
370
+ print("-" * 60 + "\n\n")
354
371
 
355
372
  return review_decision["accepted"]
356
373
 
@@ -360,7 +377,7 @@ class Maestro:
360
377
  batch_size: int = 4,
361
378
  explore_radius: int = 5,
362
379
  explore_factor: float = 0.5,
363
- verbose: bool = True,
380
+ verbose: bool = False,
364
381
  ):
365
382
  """
366
383
  Optimize the configs (parameters) of the agent.
@@ -376,7 +393,7 @@ class Maestro:
376
393
  while a lower value allocates more rollouts to ensure the discovered configs are thoroughly evaluated.
377
394
  Defaults to 0.5.
378
395
  verbose (bool): If True, related information will be printed during the optimization step.
379
- Defaults to True.
396
+ Defaults to False.
380
397
 
381
398
  Raises:
382
399
  ValueError: If the input parameters are not valid.
@@ -395,18 +412,18 @@ class Maestro:
395
412
  iterate_steps: int = explore_radius
396
413
  select_steps: int = int(explore_radius * 4 * (1 - explore_factor) / explore_factor)
397
414
  num_rounds: int = int(total_rollouts / (iterate_steps * batch_size * 4 + select_steps * batch_size))
398
-
399
- if verbose:
400
- print("optimize_config settings:")
401
- print(" total_rollouts: ", total_rollouts)
402
- print(" batch_size: ", batch_size)
403
- print(" explore_radius: ", explore_radius)
404
- print(" explore_factor: ", explore_factor)
405
- print("-------------------------------------------")
406
- print(" iterate_steps: ", iterate_steps)
407
- print(" select_steps: ", select_steps)
408
- print(" num_rounds: ", num_rounds)
409
- print("-------------------------------------------\n\n")
415
+ total_rollouts = num_rounds * (iterate_steps * batch_size * 4 + select_steps * batch_size)
416
+
417
+ print("optimize_config settings:")
418
+ print(" total_rollouts: ", total_rollouts)
419
+ print(" batch_size: ", batch_size)
420
+ print(" explore_radius: ", explore_radius)
421
+ print(" explore_factor: ", explore_factor)
422
+ print("-" * 60)
423
+ print(" iterate_steps: ", iterate_steps)
424
+ print(" select_steps: ", select_steps)
425
+ print(" num_rounds: ", num_rounds)
426
+ print("=" * 80 + "\n\n")
410
427
 
411
428
  if num_rounds == 0:
412
429
  raise ValueError(
@@ -418,19 +435,23 @@ class Maestro:
418
435
  elements=self.setups,
419
436
  weights=[setup["weight"] for setup in self.setups],
420
437
  )
438
+ group_id = uuid4().hex
439
+ pbar = tqdm(total=total_rollouts, desc="Total rollouts consumed for config optimization")
421
440
 
422
441
  for round in range(num_rounds):
423
- if verbose:
424
- print(f"================== Round {round + 1}/{num_rounds} ==================")
425
- print("Total versions: ", len(self.versions))
426
- print("Rebase to version: ", self.current_version)
427
- print("Score (current base): ", self.versions[self.current_version]["average_score"])
428
- print("Visits (current base): ", self.versions[self.current_version]["visits"])
429
- print("Visits (total): ", self.total_visits)
442
+ print("=" * 30 + f" Round {round + 1}/{num_rounds} begins" + "=" * 30)
443
+ print("Total versions: ", len(self.versions))
444
+ print("Rebase to version: ", self.current_version)
445
+ print("Score (current base): ", self.versions[self.current_version]["average_score"])
446
+ print("Visits (current base): ", self.versions[self.current_version]["visits"])
447
+ print("Visits (total): ", self.total_visits)
448
+ print("\n\n")
430
449
 
431
450
  new_version = False
432
451
  for _ in range(iterate_steps):
433
- changes_accepted = await self._iterate(batch_size=batch_size, verbose=verbose, sampler=sampler)
452
+ changes_accepted = await self._iterate(
453
+ batch_size=batch_size, verbose=verbose, sampler=sampler, group_id=group_id, pbar=pbar
454
+ )
434
455
  if changes_accepted:
435
456
  new_version = True
436
457
 
@@ -459,13 +480,16 @@ class Maestro:
459
480
  for setup in setups:
460
481
  simulator = setup["simulator"]
461
482
  critico = setup["critico"]
462
- awaitables.append(simulator.run(num_runs=1))
483
+ awaitables.append(simulator.run(num_runs=1, group_id=group_id))
463
484
  criticos.append(critico)
464
485
 
465
486
  test_cases_validation, _ = await self._evaluate(
466
487
  awaitables=awaitables, criticos=criticos, verbose=verbose, print_flag="(validation)"
467
488
  )
468
489
 
490
+ if pbar is not None:
491
+ pbar.update(len(test_cases_validation))
492
+
469
493
  validation_score = 0.0
470
494
  for test_case in test_cases_validation:
471
495
  validation_score += test_case["eval_score"]
@@ -493,21 +517,22 @@ class Maestro:
493
517
 
494
518
  # Switch to the current version with highest score
495
519
  await self._select(explore=False)
496
- if verbose:
497
- print("Total versions: ", len(self.versions))
498
- print("Best version: ", self.current_version)
499
- print("Score (best version): ", self.versions[self.current_version]["average_score"])
500
- print("Visits (best version): ", self.versions[self.current_version]["visits"])
501
- print("Visits (total): ", self.total_visits)
502
520
 
503
- print(
504
- "all versions: ",
505
- {
506
- i: {"score": self.versions[i]["average_score"], "visits": self.versions[i]["visits"]}
507
- for i in range(len(self.versions))
508
- },
509
- )
510
- print("--------------------")
521
+ print("=" * 30 + f" Round {round + 1}/{num_rounds} finishes" + "=" * 30)
522
+ print("Total versions: ", len(self.versions))
523
+ print("Best version: ", self.current_version)
524
+ print("Score (best version): ", self.versions[self.current_version]["average_score"])
525
+ print("Visits (best version): ", self.versions[self.current_version]["visits"])
526
+ print("Visits (total): ", self.total_visits)
527
+
528
+ print(
529
+ "all versions: ",
530
+ {
531
+ i: {"score": self.versions[i]["average_score"], "visits": self.versions[i]["visits"]}
532
+ for i in range(len(self.versions))
533
+ },
534
+ )
535
+ print("--------------------")
511
536
 
512
537
  async def sync_to_platform():
513
538
  payload = ConfigOptVizSchema(
@@ -543,18 +568,16 @@ class Maestro:
543
568
 
544
569
  if self.log_to_platform:
545
570
  await sync_to_platform()
546
- if verbose:
547
- print(
548
- f"Results of round {round + 1}/{num_rounds} uploaded to RELAI platform, visualization id: {self.config_opt_viz_id}"
549
- )
571
+ print(
572
+ f"Results of round {round + 1}/{num_rounds} uploaded to RELAI platform, visualization id: {self.config_opt_viz_id}"
573
+ )
550
574
 
551
575
  async def optimize_structure(
552
576
  self,
553
577
  total_rollouts: int,
554
578
  description: Optional[str] = None,
555
579
  code_paths: Optional[list[str]] = None,
556
- name: str = "No Name",
557
- verbose: bool = True,
580
+ verbose: bool = False,
558
581
  ) -> str:
559
582
  """
560
583
  Propose structural changes (i.e. changes that cannot be achieved by setting parameters alone) to
@@ -567,15 +590,17 @@ class Maestro:
567
590
  description (str, optional): Text description of the current structure/workflow/... of the agent.
568
591
  code_paths (list[str], optional): A list of paths corresponding to code files containing
569
592
  the implementation of the agent.
570
- name (str, optional): Name of the graph optimization visualization on RELAI platform.
571
- Defaults to "No Name".
572
- verbose (bool): If True, related information will be printed during the optimization.
573
- Defaults to True.
593
+ verbose (bool): If True, additional information will be printed during the optimization.
594
+ Defaults to False.
574
595
 
575
596
  Returns:
576
597
  str: Suggestion for structural changes to the agent.
577
598
  """
578
599
 
600
+ print("optimize_structure settings:")
601
+ print(" total_rollouts: ", total_rollouts)
602
+ print("-" * 60 + "\n\n")
603
+
579
604
  if code_paths is not None:
580
605
  code = extract_code(code_paths=code_paths)
581
606
  else:
@@ -585,17 +610,22 @@ class Maestro:
585
610
  elements=self.setups,
586
611
  weights=[setup["weight"] for setup in self.setups],
587
612
  )
613
+ group_id = uuid4().hex
614
+
615
+ print("Running the agent to collect traces...")
616
+
588
617
  setups = sampler.sample(total_rollouts)
589
618
  awaitables = []
590
619
  criticos = []
591
620
  for setup in setups:
592
621
  simulator = setup["simulator"]
593
622
  critico = setup["critico"]
594
- awaitables.append(simulator.run(num_runs=1))
623
+ awaitables.append(simulator.run(num_runs=1, group_id=group_id))
595
624
  criticos.append(critico)
596
625
 
597
626
  test_cases, _ = await self._evaluate(awaitables=awaitables, criticos=criticos, verbose=verbose)
598
627
 
628
+ print("Optimizing structure...")
599
629
  suggestion = await self._client.optimize_structure(
600
630
  {
601
631
  "agent_name": get_full_func_name(self.agent_fn),
@@ -611,7 +641,7 @@ class Maestro:
611
641
 
612
642
  async def sync_to_platform():
613
643
  payload = GraphOptVizSchema(
614
- name=name,
644
+ name=self.name,
615
645
  proposal=suggestion,
616
646
  runs=[
617
647
  RunSchema(
@@ -628,12 +658,12 @@ class Maestro:
628
658
 
629
659
  return await self._client.update_graph_opt_visual(payload)
630
660
 
661
+ print("=" * 40 + "suggestion" + "=" * 40)
662
+ print(suggestion)
663
+ print("=" * 90 + "\n\n")
664
+
631
665
  if self.log_to_platform:
632
666
  uid = await sync_to_platform()
633
- if verbose:
634
- print(f"Results uploaded to RELAI platform, visualization id: {uid}")
635
-
636
- if verbose:
637
- print("suggestion:\n", suggestion)
667
+ print(f"Results uploaded to RELAI platform, visualization id: {uid}")
638
668
 
639
669
  return suggestion
relai/mocker/persona.py CHANGED
@@ -141,16 +141,18 @@ class PersonaSet(Sequence[Persona]):
141
141
  A collection of Persona instances loaded from a persona set on the RELAI platform.
142
142
  """
143
143
 
144
- def __init__(self, persona_set_id: str) -> None:
144
+ def __init__(self, persona_set_id: str, **persona_kwargs: Any) -> None:
145
145
  """
146
146
  Initializes the PersonaSet with the given persona set ID.
147
147
 
148
148
  Args:
149
149
  persona_set_id (str): The ID of the persona set on the RELAI platform.
150
+ **persona_kwargs: Keyword arguments that are forwarded to each Persona created from the set.
150
151
  """
151
152
  self.persona_set_id = persona_set_id
152
153
  self._user_personas = None
153
154
  self._personas = None
155
+ self._persona_kwargs = persona_kwargs
154
156
 
155
157
  def user_personas(self) -> list[str]:
156
158
  if self._user_personas is None:
@@ -161,7 +163,9 @@ class PersonaSet(Sequence[Persona]):
161
163
 
162
164
  def personas(self) -> list[Persona]:
163
165
  if self._personas is None:
164
- self._personas = [Persona(user_persona=persona) for persona in self.user_personas()]
166
+ self._personas = [
167
+ Persona(user_persona=persona, **self._persona_kwargs) for persona in self.user_personas()
168
+ ]
165
169
  return self._personas
166
170
 
167
171
  @overload
relai/simulator.py CHANGED
@@ -203,15 +203,17 @@ class SyncSimulator(BaseSimulator):
203
203
  raise ValueError("client must be provided if log_runs is True")
204
204
  self.client = client
205
205
 
206
- def run(self, num_runs: int) -> list[AgentLog]:
206
+ def run(self, num_runs: int, group_id: str | None = None) -> list[AgentLog]:
207
207
  """
208
208
  Run the simulator for a specified number of times.
209
209
 
210
210
  Args:
211
211
  num_runs (int): The number of simulation runs to execute.
212
+ group_id (str, optional): An optional group ID to associate all runs together. If not provided,
213
+ a new UUID will be generated.
212
214
  """
213
215
  agent_logs: list[AgentLog] = []
214
- group_id = uuid4().hex
216
+ group_id = uuid4().hex if group_id is None else group_id
215
217
  tracking_on()
216
218
  for tape, config in self.tape_and_config_generator(num_runs):
217
219
  with _simulate(config), create_logging_span(tape.id):
@@ -235,16 +237,18 @@ class SyncSimulator(BaseSimulator):
235
237
  tracking_off()
236
238
  return agent_logs
237
239
 
238
- def rerun(self, simulation_tapes: list[SimulationTape]) -> list[AgentLog]:
240
+ def rerun(self, simulation_tapes: list[SimulationTape], group_id: str | None = None) -> list[AgentLog]:
239
241
  """
240
242
  Rerun the simulator for a list of simulation tapes.
241
243
 
242
244
  Args:
243
245
  simulation_tapes (list[SimulationTape]): The list of simulation tapes to rerun. This allows for re-executing
244
246
  the agent in an environment identical to a previous run and is useful for debugging and optimization.
247
+ group_id (str, optional): An optional group ID to associate all runs together. If not provided,
248
+ a new UUID will be generated.
245
249
  """
246
250
  agent_logs: list[AgentLog] = []
247
- group_id = uuid4().hex
251
+ group_id = uuid4().hex if group_id is None else group_id
248
252
  tracking_on()
249
253
  for tape in simulation_tapes:
250
254
  new_tape = tape.copy()
@@ -299,14 +303,16 @@ class AsyncSimulator(BaseSimulator):
299
303
  raise ValueError("client must be provided if log_runs is True")
300
304
  self.client = client
301
305
 
302
- async def run(self, num_runs: int) -> list[AgentLog]:
306
+ async def run(self, num_runs: int, group_id: str | None = None) -> list[AgentLog]:
303
307
  """Run the simulator for a specified number of times.
304
308
 
305
309
  Args:
306
310
  num_runs (int): The number of simulation runs to execute.
311
+ group_id (str, optional): An optional group ID to associate all runs together. If not provided,
312
+ a new UUID will be generated.
307
313
  """
308
314
  agent_logs: list[AgentLog] = []
309
- group_id = uuid4().hex
315
+ group_id = uuid4().hex if group_id is None else group_id
310
316
  tracking_on()
311
317
  for tape, config in self.tape_and_config_generator(num_runs):
312
318
  with _simulate(config), create_logging_span(tape.id):
@@ -330,16 +336,18 @@ class AsyncSimulator(BaseSimulator):
330
336
  tracking_off()
331
337
  return agent_logs
332
338
 
333
- async def rerun(self, simulation_tapes: list[SimulationTape]) -> list[AgentLog]:
339
+ async def rerun(self, simulation_tapes: list[SimulationTape], group_id: str | None = None) -> list[AgentLog]:
334
340
  """
335
341
  Rerun the simulator for a list of simulation tapes.
336
342
 
337
343
  Args:
338
344
  simulation_tapes (list[SimulationTape]): The list of simulation tapes to rerun. This allows for re-executing
339
345
  the agent in an environment identical to a previous run and is useful for debugging and optimization.
346
+ group_id (str, optional): An optional group ID to associate all runs together. If not provided,
347
+ a new UUID will be generated.
340
348
  """
341
349
  agent_logs: list[AgentLog] = []
342
- group_id = uuid4().hex
350
+ group_id = uuid4().hex if group_id is None else group_id
343
351
  tracking_on()
344
352
  for tape in simulation_tapes:
345
353
  new_tape = tape.copy()
relai/utils.py CHANGED
@@ -34,20 +34,57 @@ def create_logging_span(logger_id: str | None = None):
34
34
 
35
35
 
36
36
  def log_model(*args, **kwargs):
37
+ """
38
+ Logs a model call event.
39
+
40
+ Args:
41
+ name (str): Name of the model.
42
+ input (Any): Input to the model.
43
+ output (Any): Output from the model.
44
+ note (Optional[str]): Optional annotation.
45
+ """
37
46
  logger = get_current_logger()
38
47
  logger.log_model(*args, **kwargs)
39
48
 
40
49
 
41
50
  def log_tool(*args, **kwargs):
51
+ """
52
+ Logs a tool call event.
53
+
54
+ Args:
55
+ name (str): Name of the tool.
56
+ input (Any): Input to the tool.
57
+ output (Any): Output from the tool.
58
+ note (Optional[str]): Optional annotation.
59
+ """
42
60
  logger = get_current_logger()
43
61
  logger.log_tool(*args, **kwargs)
44
62
 
45
63
 
46
64
  def log_persona(*args, **kwargs):
65
+ """
66
+ Logs a persona activity.
67
+
68
+ Args:
69
+ name (str): Name of the persona.
70
+ model_name (str): Name of the model.
71
+ input (Any): Input to the persona.
72
+ output (Any): Output from the persona.
73
+ note (Optional[str]): Optional annotation.
74
+ """
47
75
  logger = get_current_logger()
48
76
  logger.log_persona(*args, **kwargs)
49
77
 
50
78
 
51
79
  def log_router(*args, **kwargs):
80
+ """
81
+ Logs a router event.
82
+
83
+ Args:
84
+ name (str): Name of the router.
85
+ input (Any): Input to the router.
86
+ output (Any): Output from the router.
87
+ note (Optional[str]): Optional annotation.
88
+ """
52
89
  logger = get_current_logger()
53
90
  logger.log_router(*args, **kwargs)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: relai
3
- Version: 0.3.2
3
+ Version: 0.3.3
4
4
  Summary: An SDK for building reliable AI agents
5
5
  Author-email: RELAI <priyatham@relai.ai>, RELAI <wwx@relai.ai>
6
6
  License: Apache License
@@ -205,12 +205,11 @@ License: Apache License
205
205
  See the License for the specific language governing permissions and
206
206
  limitations under the License.
207
207
  Classifier: License :: OSI Approved :: Apache Software License
208
- Classifier: Programming Language :: Python :: 3.9
209
208
  Classifier: Programming Language :: Python :: 3.10
210
209
  Classifier: Programming Language :: Python :: 3.11
211
210
  Classifier: Programming Language :: Python :: 3.12
212
211
  Classifier: Programming Language :: Python :: 3.13
213
- Requires-Python: >=3.9
212
+ Requires-Python: >=3.10
214
213
  Description-Content-Type: text/markdown
215
214
  License-File: LICENSE.md
216
215
  Requires-Dist: pydantic>=2.11.5
@@ -226,14 +225,22 @@ Dynamic: license-file
226
225
  <img align="center" src="docs/assets/relai-logo.png" width="460px" />
227
226
  </p>
228
227
  <p align="left">
229
- <h1 align="center">RELAI: Simulate → Evaluate → Optimize AI Agents</h1>
228
+ <h1 align="center">Simulate → Evaluate → Optimize AI Agents</h1>
229
+ <p align="center">
230
+ <a href="https://pypi.org/project/relai/"><img alt="PyPI" src="https://img.shields.io/pypi/v/relai.svg"></a>
231
+ <img alt="Python" src="https://img.shields.io/pypi/pyversions/relai.svg">
232
+ <a href="LICENSE.md"><img alt="License" src="https://img.shields.io/badge/license-Apache--2.0-blue.svg"></a>
233
+ <a href="http://docs.relai.ai"><img alt="Docs" src="https://img.shields.io/badge/docs-online-brightgreen.svg"></a>
234
+ <a href="https://github.com/relai-ai/relai-sdk/actions/workflows/upload-to-package-index.yml"><img alt="CI" src="https://img.shields.io/github/actions/workflow/status/relai-ai/relai-sdk/upload-to-package-index.yml?branch=main"></a>
235
+ </p>
236
+
230
237
 
231
238
  **RELAI** is an SDK for building **reliable AI agents**. It streamlines the hardest parts of agent development—**simulation**, **evaluation**, and **optimization**—so you can iterate quickly with confidence.
232
239
 
233
240
  **What you get**
234
- - **Agent Simulation** — Create full/partial environments, define **LLM personas**, mock **MCP** servers & tools, and generate **synthetic data**. Optionally **condition simulation on real samples** to better match production.
235
- - **Agent Evaluation** — Mix **code-based** and **LLM-based** custom evaluators or use **RELAI platform evaluators**. Turn human reviews into **benchmarks** you can re-run.
236
- - **Agent Optimization (Maestro)** — Holistic optimizer that uses evaluator signals & feedback to improve prompts/configs **and** suggest **graph-level** changes. Also selects **best model/tool/graph** based on observed performance.
241
+ - **Agent Simulation** — Create full/partial environments, define LLM personas, mock MCP servers & tools, and generate synthetic data. Optionally condition simulation on real samples to better match production.
242
+ - **Agent Evaluation** — Mix code-based and LLM-based custom evaluators or use RELAI platform evaluators. Turn human reviews into benchmarks you can re-run.
243
+ - **Agent Optimization (Maestro)** — Holistic optimizer that uses evaluator signals & feedback to improve **prompts/configs** and suggest **graph-level** changes. Maestro selects best model/tool/graph based on observed performance.
237
244
 
238
245
  ## Quickstart
239
246
 
@@ -249,9 +256,153 @@ uv add relai
249
256
  export RELAI_API_KEY="<RELAI_API_KEY>"
250
257
  ```
251
258
 
259
+ ### Example: A simple Stock Assistant Agent (Simulate → Evaluate → Optimize)
260
+ Prerequisites: Needs an OpenAI API key and `openai-agents` installed to run the base agent.
261
+ To use Maestro graph optimizer, save the following in a file called `stock-assistant.py` (or change the `code_paths` argument to `maestro.optimize_structure`).
262
+ ```python
263
+ # ============================================================================
264
+ # STEP 0 — Prerequisites
265
+ # ============================================================================
266
+ # export OPENAI_API_KEY="sk-..."
267
+ # `uv add openai-agents`
268
+ # export RELAI_API_KEY="relai-..."
269
+ # Save as `stock-assistant.py`
270
+
271
+ import asyncio
272
+
273
+ from agents import Agent, Runner
274
+
275
+ from relai import (
276
+ AgentOutputs,
277
+ AsyncRELAI,
278
+ AsyncSimulator,
279
+ SimulationTape,
280
+ random_env_generator,
281
+ )
282
+ from relai.critico import Critico
283
+ from relai.critico.evaluate import RELAIFormatEvaluator
284
+ from relai.maestro import Maestro, params, register_param
285
+ from relai.mocker import Persona
286
+ from relai.simulator import simulated
287
+
288
+ # ============================================================================
289
+ # STEP 1.1 — Decorate inputs/tools that will be simulated
290
+ # ============================================================================
291
+
292
+
293
+ @simulated
294
+ async def get_user_query() -> str:
295
+ """Get user's query about stock prices."""
296
+ # In a real agent, this function might get input from a chat interface.
297
+ return input("Enter you stock query: ")
298
+
299
+
300
+ # ============================================================================
301
+ # STEP 1.2 — Register parameters for optimization
302
+ # ============================================================================
303
+
304
+ register_param(
305
+ "prompt",
306
+ type="prompt",
307
+ init_value="You are a helpful assistant for stock price questions.",
308
+ desc="system prompt for the agent",
309
+ )
310
+
311
+ # ============================================================================
312
+ # STEP 2 — Your agent core
313
+ # ============================================================================
314
+
315
+
316
+ async def agent_fn(tape: SimulationTape) -> AgentOutputs:
317
+ question = await get_user_query()
318
+ agent = Agent(
319
+ name="Stock assistant",
320
+ instructions=params.prompt, # access registered parameter
321
+ model="gpt-5-mini",
322
+ )
323
+ result = await Runner.run(agent, question)
324
+ tape.extras["format_rubrics"] = {"Prices must include cents (eg: $XXX.XX)": 1.0}
325
+ tape.agent_inputs["question"] = question # trace inputs for later auditing
326
+ return {"summary": result.final_output}
327
+
328
+
329
+ async def main() -> None:
330
+ # Set up your simulation environment
331
+ # Bind Personas/MockTools to fully-qualified function names
332
+ env_generator = random_env_generator(
333
+ config_set={
334
+ "__main__.get_user_query": [Persona(user_persona="A polite and curious user.")],
335
+ }
336
+ )
337
+
338
+ async with AsyncRELAI() as client:
339
+ # ============================================================================
340
+ # STEP 3 — Simulate
341
+ # ============================================================================
342
+ simulator = AsyncSimulator(agent_fn=agent_fn, env_generator=env_generator, client=client)
343
+ agent_logs = await simulator.run(num_runs=1)
344
+
345
+ # ============================================================================
346
+ # STEP 4 — Evaluate with Critico
347
+ # ============================================================================
348
+ critico = Critico(client=client)
349
+ format_evaluator = RELAIFormatEvaluator(client=client)
350
+ critico.add_evaluators({format_evaluator: 1.0})
351
+ critico_logs = await critico.evaluate(agent_logs)
352
+
353
+ # Publish evaluation report to the RELAI platform
354
+ await critico.report(critico_logs)
355
+
356
+ maestro = Maestro(client=client, agent_fn=agent_fn, log_to_platform=True, name="Stock assistant")
357
+ maestro.add_setup(simulator=simulator, critico=critico)
358
+
359
+ # ============================================================================
360
+ # STEP 5.1 — Optimize configs with Maestro (the parameters registered earlier in STEP 2)
361
+ # ============================================================================
362
+
363
+ # params.load("saved_config.json") # load previous params if available
364
+ await maestro.optimize_config(
365
+ total_rollouts=20, # Total number of rollouts to use for optimization.
366
+ batch_size=1, # Base batch size to use for individual optimization steps. Defaults to 4.
367
+ explore_radius=1, # A positive integer controlling the aggressiveness of exploration during optimization.
368
+ explore_factor=0.5, # A float between 0 to 1 controlling the exploration-exploitation trade-off.
369
+ verbose=True, # If True, related information will be printed during the optimization step.
370
+ )
371
+ params.save("saved_config.json") # save optimized params for future usage
372
+
373
+ # ============================================================================
374
+ # STEP 5.2 — Optimize agent structure with Maestro (changes that cannot be achieved by setting parameters alone)
375
+ # ============================================================================
376
+
377
+ await maestro.optimize_structure(
378
+ total_rollouts=10, # Total number of rollouts to use for optimization.
379
+ code_paths=["stock-assistant.py"], # A list of paths corresponding to code implementations of the agent.
380
+ verbose=True, # If True, related information will be printed during the optimization step.
381
+ )
382
+
383
+
384
+ if __name__ == "__main__":
385
+ asyncio.run(main())
386
+
387
+ ```
388
+ ## Simulation
389
+ Create controlled environments where agents interact and generate traces. Compose LLM personas, mock MCP tools/servers, and synthetic data; optionally condition on real events to align simulation ⇄ production.
390
+
391
+ ➡️ Learn more: [Simulator](https://docs.relai.ai/simulator.html)
392
+
393
+ ## Evaluation (Critico)
394
+ Use code-based or LLM-based evaluators—or RELAI platform evaluators—and convert human reviews into benchmarks you can re-run in Simuation/CI pipeline.
395
+
396
+ ➡️ Learn more: [Evaluator](https://docs.relai.ai/evaluator.html)
397
+
398
+ ## Optimization (Maestro)
399
+ Maestro is a holistic agent optimizer. It consumes evaluator/user feedback to improve prompts, configs, and even graph structure when prompt tuning isn’t enough. It can also select the best model, best tool, and best graph based on observed performance.
400
+
401
+ ➡️ Learn more: [Maestro](https://docs.relai.ai/maestro.html)
402
+
252
403
  ## Links
253
404
 
254
- - 📘 **Documentation:** [docs.relai.ai](#)
405
+ - 📘 **Documentation:** [docs.relai.ai](http://docs.relai.ai)
255
406
  - 🧪 **Examples:** [relai-sdk/examples](examples)
256
407
  - 🌐 **Website:** [relai.ai](https://relai.ai)
257
408
  - 📰 **Maestro Technical Report:** [ArXiV](https://arxiv.org/abs/2509.04642)
@@ -260,3 +411,30 @@ export RELAI_API_KEY="<RELAI_API_KEY>"
260
411
  ## License
261
412
 
262
413
  Apache 2.0
414
+
415
+ ## Citation
416
+ If you use the SDK in your research, please consider citing our work:
417
+
418
+ ```
419
+ @misc{relai_sdk,
420
+ author = {RELAI, Inc.,},
421
+ title = {relai-sdk},
422
+ year = {2025},
423
+ howpublished = {\url{https://github.com/relai-ai/relai-sdk}},
424
+ note = {GitHub repository},
425
+ urldate = {2025-10-20}
426
+ }
427
+
428
+ @misc{wang2025maestrojointgraph,
429
+ title={Maestro: Joint Graph & Config Optimization for Reliable AI Agents},
430
+ author={Wenxiao Wang and Priyatham Kattakinda and Soheil Feizi},
431
+ year={2025},
432
+ eprint={2509.04642},
433
+ archivePrefix={arXiv},
434
+ primaryClass={cs.AI},
435
+ url={https://arxiv.org/abs/2509.04642},
436
+ }
437
+ ```
438
+
439
+ <p align="center"> <sub>Made with ❤️ by the RELAI team — <a href="https://relai.ai">relai.ai</a> • <a href="https://discord.gg/sjaHJ34YYE">Community</a></sub> </p>
440
+
@@ -5,24 +5,24 @@ relai/benchmark.py,sha256=YTd2xu9aKlUcaWdHInV_7U5YroivYMgTk7UE1XMZBN4,15766
5
5
  relai/data.py,sha256=ne0H4EQ0B_yxE9fogoovGExuJuwqutSpuhNsl4UmcsU,7852
6
6
  relai/exporter.py,sha256=jZxrUjlYCOpRr7gdmbg6-LUL_fXmtMgPp89CgvP5Z7A,1932
7
7
  relai/flags.py,sha256=_GrjQg7mZq7BwEIedR6cjWY4grwsryqbKdgyiRr2P7k,1929
8
- relai/logger.py,sha256=YfS8U4P89iYz4BsV1717ND6JKgOYDO_dN53207tVkLw,18219
9
- relai/simulator.py,sha256=FqPvKz3nsT-u61t0Y8L8QikG6LOFxrVvhC9NCTMvWgs,15533
10
- relai/utils.py,sha256=nUmnMAi_2NoYO9u4hhS6D-AG2HG6TymwHpuI8XrND0Y,1385
8
+ relai/logger.py,sha256=j6PdzNkltukWAqBGKAB2qH2p61kS60RwsupDz-gELB4,18358
9
+ relai/simulator.py,sha256=Ni-RxaCtDfjgYuaYOSRTwVrvxhsX_a76x8Jr1EUEP0M,16401
10
+ relai/utils.py,sha256=va3xz79NTLJiZKaBrS_3Y8dC4M_JEmf8uOwzwFYYqUU,2359
11
11
  relai/critico/__init__.py,sha256=c_mDXCVEzsQckDS4ZFOmANo8vB5Vjr1bvyQNimAPVR8,52
12
12
  relai/critico/critico.py,sha256=J1ek9v2J5WBnHnZknZEVppIrWGczVHxuRX7ghK6mpXM,7616
13
13
  relai/critico/evaluate.py,sha256=Bd-Hlsh2fz2AQ0SINoyqcdpdbWK2t8yrAPHv6UCueFY,31348
14
14
  relai/maestro/__init__.py,sha256=NVXy0v7yghGwGbtsPti4gQGtVA3vMgXdpIpiJUesqME,186
15
15
  relai/maestro/graph.py,sha256=SyY0rHzes3o5bSqlK66CQDUAeyChUhWJQM3FzJCBvfs,1850
16
- relai/maestro/optimizer.py,sha256=PABMEFIcHwDSun-d2qBfvDHS7gHw4odkBJISgpkG8b0,28240
16
+ relai/maestro/optimizer.py,sha256=y_1hxyf1z6YLUTBJn84aZGmxtDX162CCG5W6eHUDJF0,29100
17
17
  relai/maestro/params.py,sha256=-0Dtk23ClHJR6Q-PsaKr-GwUylz0-BIIquJF2eA-p-I,8925
18
18
  relai/maestro/utils.py,sha256=WIE3cR8EMDVfAJozEfngh8DfOQdRPZMxxtN-M1cMmxo,7276
19
19
  relai/mocker/__init__.py,sha256=JP2xlSG6Szc0tSEiZzCN6UXdE66uy7AmRn-p358xFVM,102
20
20
  relai/mocker/base_mocker.py,sha256=BL4WYtdxWHZdKICfo9idW5i5MrkoxJDElcoeGk-jaJM,994
21
- relai/mocker/persona.py,sha256=VwAjRTIvzZ7AVGZdswuxO5F6tvIP0czrnHEbt7X7O6w,6450
21
+ relai/mocker/persona.py,sha256=q2A_lwYrp7H6sKkguMIPl7FQ_6pL4kTaxGBJ1kU2aGA,6678
22
22
  relai/mocker/tool.py,sha256=wgbmOOTlpVClDMWzfuJfsrNwGI99k9CwzjoaRMLkAyo,2112
23
23
  relai/schema/visual.py,sha256=Y6BP5CHxLU0e7sTfNjgKmG2GD0R9a8rvITusxd-d-UE,2443
24
- relai-0.3.2.dist-info/licenses/LICENSE.md,sha256=UNo7WT0mbmbUFjRGzRGaBtybmBPB7xd2ls9tfCkv0oc,10979
25
- relai-0.3.2.dist-info/METADATA,sha256=q8bEvJMTJ7WDElFls0mXZXWwqYe4grFJsVC3HK-bMEE,15129
26
- relai-0.3.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
27
- relai-0.3.2.dist-info/top_level.txt,sha256=pRyA93fRj-HsukRNHyS4sHdvLO4TY8VvBMK44KcxRA4,6
28
- relai-0.3.2.dist-info/RECORD,,
24
+ relai-0.3.3.dist-info/licenses/LICENSE.md,sha256=UNo7WT0mbmbUFjRGzRGaBtybmBPB7xd2ls9tfCkv0oc,10979
25
+ relai-0.3.3.dist-info/METADATA,sha256=fhqMNABGwuXAYOUL8vYyEjtD937SfTmJOObwktBrM5M,23125
26
+ relai-0.3.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
27
+ relai-0.3.3.dist-info/top_level.txt,sha256=pRyA93fRj-HsukRNHyS4sHdvLO4TY8VvBMK44KcxRA4,6
28
+ relai-0.3.3.dist-info/RECORD,,
File without changes