relai 0.3.2__py3-none-any.whl → 0.3.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of relai might be problematic. Click here for more details.
- relai/logger.py +5 -2
- relai/maestro/optimizer.py +127 -87
- relai/mocker/persona.py +6 -2
- relai/mocker/tool.py +13 -12
- relai/simulator.py +16 -8
- relai/utils.py +37 -0
- {relai-0.3.2.dist-info → relai-0.3.4.dist-info}/METADATA +194 -8
- {relai-0.3.2.dist-info → relai-0.3.4.dist-info}/RECORD +11 -11
- {relai-0.3.2.dist-info → relai-0.3.4.dist-info}/WHEEL +0 -0
- {relai-0.3.2.dist-info → relai-0.3.4.dist-info}/licenses/LICENSE.md +0 -0
- {relai-0.3.2.dist-info → relai-0.3.4.dist-info}/top_level.txt +0 -0
relai/logger.py
CHANGED
|
@@ -31,8 +31,11 @@ def flatten(mapping: Mapping[str, Any]) -> Iterator[tuple[str, AttributeValue]]:
|
|
|
31
31
|
yield f"{key}.{sub_key}", sub_value
|
|
32
32
|
elif isinstance(value, list) and any(isinstance(item, Mapping) for item in value):
|
|
33
33
|
for index, sub_mapping in enumerate(value):
|
|
34
|
-
|
|
35
|
-
|
|
34
|
+
if isinstance(sub_mapping, Mapping):
|
|
35
|
+
for sub_key, sub_value in flatten(sub_mapping):
|
|
36
|
+
yield f"{key}.{index}.{sub_key}", sub_value
|
|
37
|
+
else:
|
|
38
|
+
yield f"{key}.{index}", sub_mapping
|
|
36
39
|
else:
|
|
37
40
|
if isinstance(value, Enum):
|
|
38
41
|
value = value.value
|
relai/maestro/optimizer.py
CHANGED
|
@@ -4,6 +4,9 @@ import json
|
|
|
4
4
|
import os
|
|
5
5
|
from datetime import datetime, timezone
|
|
6
6
|
from typing import Any, Awaitable, Optional
|
|
7
|
+
from uuid import uuid4
|
|
8
|
+
|
|
9
|
+
from tqdm.auto import tqdm
|
|
7
10
|
|
|
8
11
|
from relai import AsyncRELAI
|
|
9
12
|
from relai.critico.critico import Critico, CriticoLog
|
|
@@ -28,7 +31,6 @@ class Maestro:
|
|
|
28
31
|
agent_fn: AsyncAgent,
|
|
29
32
|
goal: Optional[str] = None,
|
|
30
33
|
max_memory: int = 20,
|
|
31
|
-
max_proposals: int = 3,
|
|
32
34
|
name: str = "No Name",
|
|
33
35
|
log_to_platform: bool = True,
|
|
34
36
|
):
|
|
@@ -40,8 +42,6 @@ class Maestro:
|
|
|
40
42
|
will be considered as the only goal. Defaults to None.
|
|
41
43
|
max_memory (int, optional): Control the maximum number of previous optimization history visible at each
|
|
42
44
|
optimization step. Defaults to 20.
|
|
43
|
-
max_proposals (int, optional): Control the maximum number of proposals to consider at each optimization step.
|
|
44
|
-
Defaults to 3.
|
|
45
45
|
name (str, optional): Name of the configuration optimization visualization on RELAI platform.
|
|
46
46
|
Defaults to "No Name".
|
|
47
47
|
log_to_platform (bool): Whether to log optimization progress and results on RELAI platform.
|
|
@@ -53,7 +53,6 @@ class Maestro:
|
|
|
53
53
|
self.max_memory: int = max_memory
|
|
54
54
|
self._client: AsyncRELAI = client
|
|
55
55
|
self.goal: str = goal if goal is not None else "Higher scores"
|
|
56
|
-
self.max_proposals: int = max_proposals
|
|
57
56
|
self.log_to_platform: bool = log_to_platform
|
|
58
57
|
self.config_opt_viz_id: str | None = None
|
|
59
58
|
self.name: str = name
|
|
@@ -135,8 +134,7 @@ class Maestro:
|
|
|
135
134
|
"""
|
|
136
135
|
self.total_visits += 1
|
|
137
136
|
self.versions[self.current_version]["average_score"] = (
|
|
138
|
-
self.versions[self.current_version]["average_score"] * self.versions[self.current_version]["visits"]
|
|
139
|
-
+ score
|
|
137
|
+
self.versions[self.current_version]["average_score"] * self.versions[self.current_version]["visits"] + score
|
|
140
138
|
) / (self.versions[self.current_version]["visits"] + 1.0)
|
|
141
139
|
self.versions[self.current_version]["visits"] += 1
|
|
142
140
|
|
|
@@ -162,7 +160,7 @@ class Maestro:
|
|
|
162
160
|
return str(agent_outputs)
|
|
163
161
|
|
|
164
162
|
async def _evaluate(
|
|
165
|
-
self, awaitables: list[Awaitable], criticos: list[Critico], verbose: bool =
|
|
163
|
+
self, awaitables: list[Awaitable], criticos: list[Critico], verbose: bool = False, print_flag: str = ""
|
|
166
164
|
) -> tuple[list[dict[str, Any]], list[AgentLog]]:
|
|
167
165
|
"""
|
|
168
166
|
Run and evaluate the current version of the agent through a set of awaitables.
|
|
@@ -170,8 +168,8 @@ class Maestro:
|
|
|
170
168
|
Args:
|
|
171
169
|
awaitables (list[Awaitable]): A list of awaitables, each representing a run of the agent
|
|
172
170
|
criticos (list[Critico]): A list of Critico objects, each corresponding to an awaitable
|
|
173
|
-
verbose (bool): If True,
|
|
174
|
-
Defaults to
|
|
171
|
+
verbose (bool): If True, additional information will be printed during evaluation.
|
|
172
|
+
Defaults to False.
|
|
175
173
|
print_flag (str): A string to be put next to the printed info when `verbose` is True.
|
|
176
174
|
Used to distinguish printed info from different types of evaluations.
|
|
177
175
|
|
|
@@ -215,15 +213,24 @@ class Maestro:
|
|
|
215
213
|
|
|
216
214
|
if verbose:
|
|
217
215
|
for test_case in test_cases:
|
|
218
|
-
print("
|
|
219
|
-
print(f"
|
|
220
|
-
print(f"
|
|
221
|
-
print(f"
|
|
222
|
-
print(f"eval
|
|
216
|
+
print("=================agent excution result===================")
|
|
217
|
+
print(f"- input:\n{test_case['input']}\n")
|
|
218
|
+
print(f"- log{print_flag}:\n{test_case['log']}\n")
|
|
219
|
+
print(f"- output{print_flag}:\n{test_case['output']}\n")
|
|
220
|
+
print(f"- eval score{print_flag}:\n{test_case['eval_score']}\n")
|
|
221
|
+
print(f"- eval feedback{print_flag}:\n{test_case['eval_feedback']}\n")
|
|
222
|
+
print("=========================================================\n\n")
|
|
223
223
|
|
|
224
224
|
return test_cases, agent_logs
|
|
225
225
|
|
|
226
|
-
async def _iterate(
|
|
226
|
+
async def _iterate(
|
|
227
|
+
self,
|
|
228
|
+
batch_size: int,
|
|
229
|
+
sampler: ProportionalSampler,
|
|
230
|
+
verbose: bool = False,
|
|
231
|
+
group_id: str | None = None,
|
|
232
|
+
pbar: tqdm | None = None,
|
|
233
|
+
) -> bool:
|
|
227
234
|
"""
|
|
228
235
|
An iterate step will propose changes to the current version of the agent and
|
|
229
236
|
conduct a preliminary examination of the proposed changes.
|
|
@@ -236,8 +243,11 @@ class Maestro:
|
|
|
236
243
|
i.e. `critico`, where `batch_size` of them will be used to propose changes and the other
|
|
237
244
|
`batch_size` of them will be used for preliminary examinations.
|
|
238
245
|
sampler (ProportionalSampler): Sampler to use for selecting setups.
|
|
239
|
-
verbose (bool): If True,
|
|
240
|
-
Defaults to
|
|
246
|
+
verbose (bool): If True, additional information will be printed during the iterate step.
|
|
247
|
+
Defaults to False.
|
|
248
|
+
group_id (str, optional): An optional group ID to associate all runs together. If not provided,
|
|
249
|
+
a new UUID will be generated.
|
|
250
|
+
pbar (tqdm, optional): A progress bar to display the progress of the iteration. Defaults to None.
|
|
241
251
|
|
|
242
252
|
Returns:
|
|
243
253
|
bool: True if the proposed changes pass the preliminary examination and False otherwise.
|
|
@@ -250,23 +260,27 @@ class Maestro:
|
|
|
250
260
|
"No setup (simulator, critico) has been added to Maestro. Please add at least one setup before optimization."
|
|
251
261
|
)
|
|
252
262
|
|
|
263
|
+
group_id = uuid4().hex if group_id is None else group_id
|
|
264
|
+
|
|
253
265
|
setups = sampler.sample(batch_size * 2)
|
|
254
266
|
awaitables = []
|
|
255
267
|
criticos = []
|
|
256
268
|
for setup in setups:
|
|
257
269
|
simulator = setup["simulator"]
|
|
258
270
|
critico = setup["critico"]
|
|
259
|
-
awaitables.append(simulator.run(num_runs=1))
|
|
271
|
+
awaitables.append(simulator.run(num_runs=1, group_id=group_id))
|
|
260
272
|
criticos.append(critico)
|
|
261
273
|
|
|
262
274
|
test_cases, agent_logs = await self._evaluate(awaitables=awaitables, criticos=criticos, verbose=verbose)
|
|
263
275
|
|
|
276
|
+
if pbar is not None:
|
|
277
|
+
pbar.update(len(test_cases))
|
|
278
|
+
|
|
264
279
|
analysis, proposed_values = await self._client.propose_values(
|
|
265
280
|
{
|
|
266
281
|
"params": params.export(),
|
|
267
282
|
"serialized_past_proposals": self._serialize_past_proposals(),
|
|
268
283
|
"test_cases": test_cases[:batch_size],
|
|
269
|
-
"max_proposals": self.max_proposals,
|
|
270
284
|
"goal": self.goal,
|
|
271
285
|
"param_graph": param_graph.export(),
|
|
272
286
|
}
|
|
@@ -276,14 +290,13 @@ class Maestro:
|
|
|
276
290
|
for param, value in proposed_values.items():
|
|
277
291
|
changes.append({"param": param, "previous value": params.__getattr__(param), "new value": value})
|
|
278
292
|
if verbose:
|
|
279
|
-
print("
|
|
280
|
-
print("proposed param change:", param)
|
|
293
|
+
print("=" * 60)
|
|
294
|
+
print("- proposed param change:", param)
|
|
281
295
|
print("")
|
|
282
|
-
print("previous value
|
|
296
|
+
print("- previous value:\n\n", params.__getattr__(param))
|
|
283
297
|
print("")
|
|
284
|
-
print("new value
|
|
285
|
-
print("
|
|
286
|
-
print("--------------------------")
|
|
298
|
+
print("- new value:\n\n", value)
|
|
299
|
+
print("=" * 60)
|
|
287
300
|
|
|
288
301
|
self.log.append({"proposal id": len(self.log), "proposed changes": changes})
|
|
289
302
|
|
|
@@ -297,13 +310,16 @@ class Maestro:
|
|
|
297
310
|
for test_case, agent_log, setup in zip(test_cases, agent_logs, setups):
|
|
298
311
|
simulator = setup["simulator"]
|
|
299
312
|
critico = setup["critico"]
|
|
300
|
-
new_awaitables.append(simulator.rerun([agent_log.simulation_tape]))
|
|
313
|
+
new_awaitables.append(simulator.rerun([agent_log.simulation_tape], group_id=group_id))
|
|
301
314
|
new_criticos.append(critico)
|
|
302
315
|
|
|
303
316
|
test_cases_updated, _ = await self._evaluate(
|
|
304
317
|
awaitables=new_awaitables, criticos=new_criticos, verbose=verbose, print_flag=" (changed)"
|
|
305
318
|
)
|
|
306
319
|
|
|
320
|
+
if pbar is not None:
|
|
321
|
+
pbar.update(len(test_cases_updated))
|
|
322
|
+
|
|
307
323
|
for sample_id in range(0, batch_size * 2):
|
|
308
324
|
test_cases_updated[sample_id]["previous_log"] = test_cases[sample_id]["log"]
|
|
309
325
|
test_cases_updated[sample_id]["previous_output"] = test_cases[sample_id]["output"]
|
|
@@ -350,24 +366,24 @@ class Maestro:
|
|
|
350
366
|
print("new avg score: ", new_score)
|
|
351
367
|
print("accepted: ", review_decision["accepted"])
|
|
352
368
|
print("review comment:\n", review_decision["full comment"])
|
|
353
|
-
print("
|
|
369
|
+
print("-" * 60 + "\n\n")
|
|
354
370
|
|
|
355
371
|
return review_decision["accepted"]
|
|
356
372
|
|
|
357
373
|
async def optimize_config(
|
|
358
374
|
self,
|
|
359
375
|
total_rollouts: int,
|
|
360
|
-
batch_size: int =
|
|
376
|
+
batch_size: int = 8,
|
|
361
377
|
explore_radius: int = 5,
|
|
362
378
|
explore_factor: float = 0.5,
|
|
363
|
-
verbose: bool =
|
|
379
|
+
verbose: bool = False,
|
|
364
380
|
):
|
|
365
381
|
"""
|
|
366
382
|
Optimize the configs (parameters) of the agent.
|
|
367
383
|
|
|
368
384
|
Args:
|
|
369
385
|
total_rollouts (int): Total number of rollouts to use for optimization.
|
|
370
|
-
batch_size (int): Base batch size to use for individual optimization steps. Defaults to
|
|
386
|
+
batch_size (int): Base batch size to use for individual optimization steps. Defaults to 8.
|
|
371
387
|
explore_radius (int): A positive integer controlling the aggressiveness of exploration during optimization.
|
|
372
388
|
A larger `explore_radius` encourages the optimizer to make more substantial changes between successive configurations.
|
|
373
389
|
Defaults to 5.
|
|
@@ -376,7 +392,7 @@ class Maestro:
|
|
|
376
392
|
while a lower value allocates more rollouts to ensure the discovered configs are thoroughly evaluated.
|
|
377
393
|
Defaults to 0.5.
|
|
378
394
|
verbose (bool): If True, related information will be printed during the optimization step.
|
|
379
|
-
Defaults to
|
|
395
|
+
Defaults to False.
|
|
380
396
|
|
|
381
397
|
Raises:
|
|
382
398
|
ValueError: If the input parameters are not valid.
|
|
@@ -390,47 +406,56 @@ class Maestro:
|
|
|
390
406
|
if explore_factor <= 0 or explore_factor >= 1:
|
|
391
407
|
raise ValueError(f"`explore_factor` must be a float between 0 and 1, got {explore_factor}.")
|
|
392
408
|
|
|
393
|
-
|
|
394
|
-
#
|
|
409
|
+
group_size = (batch_size + 1) // 2
|
|
410
|
+
# total_rollouts = (iterate_steps * group_size * 4 + select_steps * group_size) * num_rounds
|
|
411
|
+
# explore_factor = (iterate_steps * group_size * 4) / (iterate_steps * group_size * 4 + select_steps * group_size)
|
|
395
412
|
iterate_steps: int = explore_radius
|
|
396
413
|
select_steps: int = int(explore_radius * 4 * (1 - explore_factor) / explore_factor)
|
|
397
|
-
num_rounds: int = int(total_rollouts / (iterate_steps *
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
414
|
+
num_rounds: int = int(total_rollouts / (iterate_steps * group_size * 4 + select_steps * group_size))
|
|
415
|
+
total_rollouts = num_rounds * (iterate_steps * group_size * 4 + select_steps * group_size)
|
|
416
|
+
|
|
417
|
+
print("optimize_config settings:")
|
|
418
|
+
print(" total_rollouts: ", total_rollouts)
|
|
419
|
+
print(" (adjusted) batch_size: ", group_size * 2)
|
|
420
|
+
print(" explore_radius: ", explore_radius)
|
|
421
|
+
print(" explore_factor: ", explore_factor)
|
|
422
|
+
print("-" * 60)
|
|
423
|
+
print(" iterate_steps: ", iterate_steps)
|
|
424
|
+
print(" select_steps: ", select_steps)
|
|
425
|
+
print(" num_rounds: ", num_rounds)
|
|
426
|
+
print("=" * 80 + "\n\n")
|
|
410
427
|
|
|
411
428
|
if num_rounds == 0:
|
|
412
429
|
raise ValueError(
|
|
413
430
|
f"`total_rollouts` is too small for the given `batch_size` {batch_size}, `explore_radius` {explore_radius}, and `explore_factor` {explore_factor}. "
|
|
414
|
-
f"Please increase `total_rollouts` to at least {iterate_steps *
|
|
431
|
+
f"Please increase `total_rollouts` to at least {iterate_steps * group_size * 4 + select_steps * group_size}."
|
|
415
432
|
)
|
|
416
433
|
|
|
417
434
|
sampler = ProportionalSampler(
|
|
418
435
|
elements=self.setups,
|
|
419
436
|
weights=[setup["weight"] for setup in self.setups],
|
|
420
437
|
)
|
|
438
|
+
group_id = "Maestro-Config-" + uuid4().hex
|
|
439
|
+
pbar = tqdm(total=total_rollouts, desc="Total rollouts consumed for config optimization")
|
|
421
440
|
|
|
422
441
|
for round in range(num_rounds):
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
442
|
+
print("\n\n" + "=" * 30 + f" Round {round + 1}/{num_rounds} begins" + "=" * 30)
|
|
443
|
+
print("Total versions accepted: ", len(self.versions))
|
|
444
|
+
print("Rebase to version: ", self.current_version)
|
|
445
|
+
print(
|
|
446
|
+
"Score for the current base version: %s based on %s rollouts"
|
|
447
|
+
% (
|
|
448
|
+
self.versions[self.current_version]["average_score"],
|
|
449
|
+
self.versions[self.current_version]["visits"] * group_size,
|
|
450
|
+
)
|
|
451
|
+
)
|
|
452
|
+
print("\n\n")
|
|
430
453
|
|
|
431
454
|
new_version = False
|
|
432
455
|
for _ in range(iterate_steps):
|
|
433
|
-
changes_accepted = await self._iterate(
|
|
456
|
+
changes_accepted = await self._iterate(
|
|
457
|
+
batch_size=group_size, verbose=verbose, sampler=sampler, group_id=group_id, pbar=pbar
|
|
458
|
+
)
|
|
434
459
|
if changes_accepted:
|
|
435
460
|
new_version = True
|
|
436
461
|
|
|
@@ -453,19 +478,22 @@ class Maestro:
|
|
|
453
478
|
for _ in range(select_steps):
|
|
454
479
|
await self._select(explore=True)
|
|
455
480
|
|
|
456
|
-
setups = sampler.sample(
|
|
481
|
+
setups = sampler.sample(group_size)
|
|
457
482
|
awaitables = []
|
|
458
483
|
criticos = []
|
|
459
484
|
for setup in setups:
|
|
460
485
|
simulator = setup["simulator"]
|
|
461
486
|
critico = setup["critico"]
|
|
462
|
-
awaitables.append(simulator.run(num_runs=1))
|
|
487
|
+
awaitables.append(simulator.run(num_runs=1, group_id=group_id))
|
|
463
488
|
criticos.append(critico)
|
|
464
489
|
|
|
465
490
|
test_cases_validation, _ = await self._evaluate(
|
|
466
491
|
awaitables=awaitables, criticos=criticos, verbose=verbose, print_flag="(validation)"
|
|
467
492
|
)
|
|
468
493
|
|
|
494
|
+
if pbar is not None:
|
|
495
|
+
pbar.update(len(test_cases_validation))
|
|
496
|
+
|
|
469
497
|
validation_score = 0.0
|
|
470
498
|
for test_case in test_cases_validation:
|
|
471
499
|
validation_score += test_case["eval_score"]
|
|
@@ -493,21 +521,26 @@ class Maestro:
|
|
|
493
521
|
|
|
494
522
|
# Switch to the current version with highest score
|
|
495
523
|
await self._select(explore=False)
|
|
496
|
-
if verbose:
|
|
497
|
-
print("Total versions: ", len(self.versions))
|
|
498
|
-
print("Best version: ", self.current_version)
|
|
499
|
-
print("Score (best version): ", self.versions[self.current_version]["average_score"])
|
|
500
|
-
print("Visits (best version): ", self.versions[self.current_version]["visits"])
|
|
501
|
-
print("Visits (total): ", self.total_visits)
|
|
502
524
|
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
525
|
+
print("\n\n" + "=" * 30 + f" Round {round + 1}/{num_rounds} finishes" + "=" * 30)
|
|
526
|
+
print("Total versions accepted: ", len(self.versions))
|
|
527
|
+
print("Best version index: ", self.current_version)
|
|
528
|
+
print(
|
|
529
|
+
"Score for the best version: %s based on %s rollouts"
|
|
530
|
+
% (
|
|
531
|
+
self.versions[self.current_version]["average_score"],
|
|
532
|
+
self.versions[self.current_version]["visits"] * group_size,
|
|
509
533
|
)
|
|
510
|
-
|
|
534
|
+
)
|
|
535
|
+
|
|
536
|
+
print(
|
|
537
|
+
"All versions: ",
|
|
538
|
+
{
|
|
539
|
+
i: {"score": self.versions[i]["average_score"], "rollouts evaluated": self.versions[i]["visits"] * group_size}
|
|
540
|
+
for i in range(len(self.versions))
|
|
541
|
+
},
|
|
542
|
+
)
|
|
543
|
+
print("--------------------")
|
|
511
544
|
|
|
512
545
|
async def sync_to_platform():
|
|
513
546
|
payload = ConfigOptVizSchema(
|
|
@@ -543,18 +576,16 @@ class Maestro:
|
|
|
543
576
|
|
|
544
577
|
if self.log_to_platform:
|
|
545
578
|
await sync_to_platform()
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
)
|
|
579
|
+
print(
|
|
580
|
+
f"Results of round {round + 1}/{num_rounds} uploaded to RELAI platform, visualization id: {self.config_opt_viz_id}"
|
|
581
|
+
)
|
|
550
582
|
|
|
551
583
|
async def optimize_structure(
|
|
552
584
|
self,
|
|
553
585
|
total_rollouts: int,
|
|
554
586
|
description: Optional[str] = None,
|
|
555
587
|
code_paths: Optional[list[str]] = None,
|
|
556
|
-
|
|
557
|
-
verbose: bool = True,
|
|
588
|
+
verbose: bool = False,
|
|
558
589
|
) -> str:
|
|
559
590
|
"""
|
|
560
591
|
Propose structural changes (i.e. changes that cannot be achieved by setting parameters alone) to
|
|
@@ -567,15 +598,17 @@ class Maestro:
|
|
|
567
598
|
description (str, optional): Text description of the current structure/workflow/... of the agent.
|
|
568
599
|
code_paths (list[str], optional): A list of paths corresponding to code files containing
|
|
569
600
|
the implementation of the agent.
|
|
570
|
-
|
|
571
|
-
Defaults to
|
|
572
|
-
verbose (bool): If True, related information will be printed during the optimization.
|
|
573
|
-
Defaults to True.
|
|
601
|
+
verbose (bool): If True, additional information will be printed during the optimization.
|
|
602
|
+
Defaults to False.
|
|
574
603
|
|
|
575
604
|
Returns:
|
|
576
605
|
str: Suggestion for structural changes to the agent.
|
|
577
606
|
"""
|
|
578
607
|
|
|
608
|
+
print("optimize_structure settings:")
|
|
609
|
+
print(" total_rollouts: ", total_rollouts)
|
|
610
|
+
print("=" * 80 + "\n\n")
|
|
611
|
+
|
|
579
612
|
if code_paths is not None:
|
|
580
613
|
code = extract_code(code_paths=code_paths)
|
|
581
614
|
else:
|
|
@@ -585,17 +618,24 @@ class Maestro:
|
|
|
585
618
|
elements=self.setups,
|
|
586
619
|
weights=[setup["weight"] for setup in self.setups],
|
|
587
620
|
)
|
|
621
|
+
group_id = "Maestro-Struct-" + uuid4().hex
|
|
622
|
+
|
|
623
|
+
print("=" * 80)
|
|
624
|
+
print("Running the agent to collect traces...\n\n")
|
|
625
|
+
|
|
588
626
|
setups = sampler.sample(total_rollouts)
|
|
589
627
|
awaitables = []
|
|
590
628
|
criticos = []
|
|
591
629
|
for setup in setups:
|
|
592
630
|
simulator = setup["simulator"]
|
|
593
631
|
critico = setup["critico"]
|
|
594
|
-
awaitables.append(simulator.run(num_runs=1))
|
|
632
|
+
awaitables.append(simulator.run(num_runs=1, group_id=group_id))
|
|
595
633
|
criticos.append(critico)
|
|
596
634
|
|
|
597
635
|
test_cases, _ = await self._evaluate(awaitables=awaitables, criticos=criticos, verbose=verbose)
|
|
598
636
|
|
|
637
|
+
print("=" * 80)
|
|
638
|
+
print("Optimizing structure...\n\n")
|
|
599
639
|
suggestion = await self._client.optimize_structure(
|
|
600
640
|
{
|
|
601
641
|
"agent_name": get_full_func_name(self.agent_fn),
|
|
@@ -611,7 +651,7 @@ class Maestro:
|
|
|
611
651
|
|
|
612
652
|
async def sync_to_platform():
|
|
613
653
|
payload = GraphOptVizSchema(
|
|
614
|
-
name=name,
|
|
654
|
+
name=self.name,
|
|
615
655
|
proposal=suggestion,
|
|
616
656
|
runs=[
|
|
617
657
|
RunSchema(
|
|
@@ -628,12 +668,12 @@ class Maestro:
|
|
|
628
668
|
|
|
629
669
|
return await self._client.update_graph_opt_visual(payload)
|
|
630
670
|
|
|
671
|
+
print("=" * 40 + "suggestion" + "=" * 40)
|
|
672
|
+
print(suggestion)
|
|
673
|
+
print("=" * 90 + "\n\n")
|
|
674
|
+
|
|
631
675
|
if self.log_to_platform:
|
|
632
676
|
uid = await sync_to_platform()
|
|
633
|
-
|
|
634
|
-
print(f"Results uploaded to RELAI platform, visualization id: {uid}")
|
|
635
|
-
|
|
636
|
-
if verbose:
|
|
637
|
-
print("suggestion:\n", suggestion)
|
|
677
|
+
print(f"Results uploaded to RELAI platform, visualization id: {uid}")
|
|
638
678
|
|
|
639
679
|
return suggestion
|
relai/mocker/persona.py
CHANGED
|
@@ -141,16 +141,18 @@ class PersonaSet(Sequence[Persona]):
|
|
|
141
141
|
A collection of Persona instances loaded from a persona set on the RELAI platform.
|
|
142
142
|
"""
|
|
143
143
|
|
|
144
|
-
def __init__(self, persona_set_id: str) -> None:
|
|
144
|
+
def __init__(self, persona_set_id: str, **persona_kwargs: Any) -> None:
|
|
145
145
|
"""
|
|
146
146
|
Initializes the PersonaSet with the given persona set ID.
|
|
147
147
|
|
|
148
148
|
Args:
|
|
149
149
|
persona_set_id (str): The ID of the persona set on the RELAI platform.
|
|
150
|
+
**persona_kwargs: Keyword arguments that are forwarded to each Persona created from the set.
|
|
150
151
|
"""
|
|
151
152
|
self.persona_set_id = persona_set_id
|
|
152
153
|
self._user_personas = None
|
|
153
154
|
self._personas = None
|
|
155
|
+
self._persona_kwargs = persona_kwargs
|
|
154
156
|
|
|
155
157
|
def user_personas(self) -> list[str]:
|
|
156
158
|
if self._user_personas is None:
|
|
@@ -161,7 +163,9 @@ class PersonaSet(Sequence[Persona]):
|
|
|
161
163
|
|
|
162
164
|
def personas(self) -> list[Persona]:
|
|
163
165
|
if self._personas is None:
|
|
164
|
-
self._personas = [
|
|
166
|
+
self._personas = [
|
|
167
|
+
Persona(user_persona=persona, **self._persona_kwargs) for persona in self.user_personas()
|
|
168
|
+
]
|
|
165
169
|
return self._personas
|
|
166
170
|
|
|
167
171
|
@overload
|
relai/mocker/tool.py
CHANGED
|
@@ -4,6 +4,7 @@ from uuid import uuid4
|
|
|
4
4
|
|
|
5
5
|
from agents import Agent, Runner, SQLiteSession
|
|
6
6
|
|
|
7
|
+
from ..utils import no_trace
|
|
7
8
|
from .base_mocker import BaseMocker
|
|
8
9
|
|
|
9
10
|
|
|
@@ -48,12 +49,12 @@ class MockTool(BaseMocker):
|
|
|
48
49
|
"kwargs": kwargs,
|
|
49
50
|
}
|
|
50
51
|
)
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
52
|
+
with no_trace():
|
|
53
|
+
result = Runner.run_sync(
|
|
54
|
+
self.agent,
|
|
55
|
+
agent_input,
|
|
56
|
+
session=self._session,
|
|
57
|
+
)
|
|
57
58
|
output = result.final_output
|
|
58
59
|
return output
|
|
59
60
|
|
|
@@ -64,11 +65,11 @@ class MockTool(BaseMocker):
|
|
|
64
65
|
"kwargs": kwargs,
|
|
65
66
|
}
|
|
66
67
|
)
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
68
|
+
with no_trace():
|
|
69
|
+
result = await Runner.run(
|
|
70
|
+
self.agent,
|
|
71
|
+
agent_input,
|
|
72
|
+
session=self._session,
|
|
73
|
+
)
|
|
73
74
|
output = result.final_output
|
|
74
75
|
return output
|
relai/simulator.py
CHANGED
|
@@ -203,15 +203,17 @@ class SyncSimulator(BaseSimulator):
|
|
|
203
203
|
raise ValueError("client must be provided if log_runs is True")
|
|
204
204
|
self.client = client
|
|
205
205
|
|
|
206
|
-
def run(self, num_runs: int) -> list[AgentLog]:
|
|
206
|
+
def run(self, num_runs: int, group_id: str | None = None) -> list[AgentLog]:
|
|
207
207
|
"""
|
|
208
208
|
Run the simulator for a specified number of times.
|
|
209
209
|
|
|
210
210
|
Args:
|
|
211
211
|
num_runs (int): The number of simulation runs to execute.
|
|
212
|
+
group_id (str, optional): An optional group ID to associate all runs together. If not provided,
|
|
213
|
+
a new UUID will be generated.
|
|
212
214
|
"""
|
|
213
215
|
agent_logs: list[AgentLog] = []
|
|
214
|
-
group_id = uuid4().hex
|
|
216
|
+
group_id = ("Simulate-" + uuid4().hex) if group_id is None else group_id
|
|
215
217
|
tracking_on()
|
|
216
218
|
for tape, config in self.tape_and_config_generator(num_runs):
|
|
217
219
|
with _simulate(config), create_logging_span(tape.id):
|
|
@@ -235,16 +237,18 @@ class SyncSimulator(BaseSimulator):
|
|
|
235
237
|
tracking_off()
|
|
236
238
|
return agent_logs
|
|
237
239
|
|
|
238
|
-
def rerun(self, simulation_tapes: list[SimulationTape]) -> list[AgentLog]:
|
|
240
|
+
def rerun(self, simulation_tapes: list[SimulationTape], group_id: str | None = None) -> list[AgentLog]:
|
|
239
241
|
"""
|
|
240
242
|
Rerun the simulator for a list of simulation tapes.
|
|
241
243
|
|
|
242
244
|
Args:
|
|
243
245
|
simulation_tapes (list[SimulationTape]): The list of simulation tapes to rerun. This allows for re-executing
|
|
244
246
|
the agent in an environment identical to a previous run and is useful for debugging and optimization.
|
|
247
|
+
group_id (str, optional): An optional group ID to associate all runs together. If not provided,
|
|
248
|
+
a new UUID will be generated.
|
|
245
249
|
"""
|
|
246
250
|
agent_logs: list[AgentLog] = []
|
|
247
|
-
group_id = uuid4().hex
|
|
251
|
+
group_id = ("Simulate-" + uuid4().hex) if group_id is None else group_id
|
|
248
252
|
tracking_on()
|
|
249
253
|
for tape in simulation_tapes:
|
|
250
254
|
new_tape = tape.copy()
|
|
@@ -299,14 +303,16 @@ class AsyncSimulator(BaseSimulator):
|
|
|
299
303
|
raise ValueError("client must be provided if log_runs is True")
|
|
300
304
|
self.client = client
|
|
301
305
|
|
|
302
|
-
async def run(self, num_runs: int) -> list[AgentLog]:
|
|
306
|
+
async def run(self, num_runs: int, group_id: str | None = None) -> list[AgentLog]:
|
|
303
307
|
"""Run the simulator for a specified number of times.
|
|
304
308
|
|
|
305
309
|
Args:
|
|
306
310
|
num_runs (int): The number of simulation runs to execute.
|
|
311
|
+
group_id (str, optional): An optional group ID to associate all runs together. If not provided,
|
|
312
|
+
a new UUID will be generated.
|
|
307
313
|
"""
|
|
308
314
|
agent_logs: list[AgentLog] = []
|
|
309
|
-
group_id = uuid4().hex
|
|
315
|
+
group_id = ("Simulate-" + uuid4().hex) if group_id is None else group_id
|
|
310
316
|
tracking_on()
|
|
311
317
|
for tape, config in self.tape_and_config_generator(num_runs):
|
|
312
318
|
with _simulate(config), create_logging_span(tape.id):
|
|
@@ -330,16 +336,18 @@ class AsyncSimulator(BaseSimulator):
|
|
|
330
336
|
tracking_off()
|
|
331
337
|
return agent_logs
|
|
332
338
|
|
|
333
|
-
async def rerun(self, simulation_tapes: list[SimulationTape]) -> list[AgentLog]:
|
|
339
|
+
async def rerun(self, simulation_tapes: list[SimulationTape], group_id: str | None = None) -> list[AgentLog]:
|
|
334
340
|
"""
|
|
335
341
|
Rerun the simulator for a list of simulation tapes.
|
|
336
342
|
|
|
337
343
|
Args:
|
|
338
344
|
simulation_tapes (list[SimulationTape]): The list of simulation tapes to rerun. This allows for re-executing
|
|
339
345
|
the agent in an environment identical to a previous run and is useful for debugging and optimization.
|
|
346
|
+
group_id (str, optional): An optional group ID to associate all runs together. If not provided,
|
|
347
|
+
a new UUID will be generated.
|
|
340
348
|
"""
|
|
341
349
|
agent_logs: list[AgentLog] = []
|
|
342
|
-
group_id = uuid4().hex
|
|
350
|
+
group_id = ("Simulate-" + uuid4().hex) if group_id is None else group_id
|
|
343
351
|
tracking_on()
|
|
344
352
|
for tape in simulation_tapes:
|
|
345
353
|
new_tape = tape.copy()
|
relai/utils.py
CHANGED
|
@@ -34,20 +34,57 @@ def create_logging_span(logger_id: str | None = None):
|
|
|
34
34
|
|
|
35
35
|
|
|
36
36
|
def log_model(*args, **kwargs):
|
|
37
|
+
"""
|
|
38
|
+
Logs a model call event.
|
|
39
|
+
|
|
40
|
+
Args:
|
|
41
|
+
name (str): Name of the model.
|
|
42
|
+
input (Any): Input to the model.
|
|
43
|
+
output (Any): Output from the model.
|
|
44
|
+
note (Optional[str]): Optional annotation.
|
|
45
|
+
"""
|
|
37
46
|
logger = get_current_logger()
|
|
38
47
|
logger.log_model(*args, **kwargs)
|
|
39
48
|
|
|
40
49
|
|
|
41
50
|
def log_tool(*args, **kwargs):
|
|
51
|
+
"""
|
|
52
|
+
Logs a tool call event.
|
|
53
|
+
|
|
54
|
+
Args:
|
|
55
|
+
name (str): Name of the tool.
|
|
56
|
+
input (Any): Input to the tool.
|
|
57
|
+
output (Any): Output from the tool.
|
|
58
|
+
note (Optional[str]): Optional annotation.
|
|
59
|
+
"""
|
|
42
60
|
logger = get_current_logger()
|
|
43
61
|
logger.log_tool(*args, **kwargs)
|
|
44
62
|
|
|
45
63
|
|
|
46
64
|
def log_persona(*args, **kwargs):
|
|
65
|
+
"""
|
|
66
|
+
Logs a persona activity.
|
|
67
|
+
|
|
68
|
+
Args:
|
|
69
|
+
name (str): Name of the persona.
|
|
70
|
+
model_name (str): Name of the model.
|
|
71
|
+
input (Any): Input to the persona.
|
|
72
|
+
output (Any): Output from the persona.
|
|
73
|
+
note (Optional[str]): Optional annotation.
|
|
74
|
+
"""
|
|
47
75
|
logger = get_current_logger()
|
|
48
76
|
logger.log_persona(*args, **kwargs)
|
|
49
77
|
|
|
50
78
|
|
|
51
79
|
def log_router(*args, **kwargs):
|
|
80
|
+
"""
|
|
81
|
+
Logs a router event.
|
|
82
|
+
|
|
83
|
+
Args:
|
|
84
|
+
name (str): Name of the router.
|
|
85
|
+
input (Any): Input to the router.
|
|
86
|
+
output (Any): Output from the router.
|
|
87
|
+
note (Optional[str]): Optional annotation.
|
|
88
|
+
"""
|
|
52
89
|
logger = get_current_logger()
|
|
53
90
|
logger.log_router(*args, **kwargs)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: relai
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.4
|
|
4
4
|
Summary: An SDK for building reliable AI agents
|
|
5
5
|
Author-email: RELAI <priyatham@relai.ai>, RELAI <wwx@relai.ai>
|
|
6
6
|
License: Apache License
|
|
@@ -205,12 +205,12 @@ License: Apache License
|
|
|
205
205
|
See the License for the specific language governing permissions and
|
|
206
206
|
limitations under the License.
|
|
207
207
|
Classifier: License :: OSI Approved :: Apache Software License
|
|
208
|
-
Classifier:
|
|
208
|
+
Classifier: Development Status :: 4 - Beta
|
|
209
209
|
Classifier: Programming Language :: Python :: 3.10
|
|
210
210
|
Classifier: Programming Language :: Python :: 3.11
|
|
211
211
|
Classifier: Programming Language :: Python :: 3.12
|
|
212
212
|
Classifier: Programming Language :: Python :: 3.13
|
|
213
|
-
Requires-Python: >=3.
|
|
213
|
+
Requires-Python: >=3.10
|
|
214
214
|
Description-Content-Type: text/markdown
|
|
215
215
|
License-File: LICENSE.md
|
|
216
216
|
Requires-Dist: pydantic>=2.11.5
|
|
@@ -226,14 +226,22 @@ Dynamic: license-file
|
|
|
226
226
|
<img align="center" src="docs/assets/relai-logo.png" width="460px" />
|
|
227
227
|
</p>
|
|
228
228
|
<p align="left">
|
|
229
|
-
<h1 align="center">
|
|
229
|
+
<h1 align="center">Simulate → Evaluate → Optimize AI Agents</h1>
|
|
230
|
+
<p align="center">
|
|
231
|
+
<a href="https://pypi.org/project/relai/"><img alt="PyPI" src="https://img.shields.io/pypi/v/relai.svg"></a>
|
|
232
|
+
<img alt="Python" src="https://img.shields.io/pypi/pyversions/relai.svg">
|
|
233
|
+
<a href="LICENSE.md"><img alt="License" src="https://img.shields.io/badge/license-Apache--2.0-blue.svg"></a>
|
|
234
|
+
<a href="http://docs.relai.ai"><img alt="Docs" src="https://img.shields.io/badge/docs-online-brightgreen.svg"></a>
|
|
235
|
+
<a href="https://github.com/relai-ai/relai-sdk/actions/workflows/upload-to-package-index.yml"><img alt="CI" src="https://img.shields.io/github/actions/workflow/status/relai-ai/relai-sdk/upload-to-package-index.yml?branch=main"></a>
|
|
236
|
+
</p>
|
|
237
|
+
|
|
230
238
|
|
|
231
239
|
**RELAI** is an SDK for building **reliable AI agents**. It streamlines the hardest parts of agent development—**simulation**, **evaluation**, and **optimization**—so you can iterate quickly with confidence.
|
|
232
240
|
|
|
233
241
|
**What you get**
|
|
234
|
-
- **Agent Simulation** — Create full/partial environments, define
|
|
235
|
-
- **Agent Evaluation** — Mix
|
|
236
|
-
- **Agent Optimization (Maestro)** — Holistic optimizer that uses evaluator signals & feedback to improve prompts/configs
|
|
242
|
+
- **Agent Simulation** — Create full/partial environments, define LLM personas, mock MCP servers & tools, and generate synthetic data. Optionally condition simulation on real samples to better match production.
|
|
243
|
+
- **Agent Evaluation** — Mix code-based and LLM-based custom evaluators or use RELAI platform evaluators. Turn human reviews into benchmarks you can re-run.
|
|
244
|
+
- **Agent Optimization (Maestro)** — Holistic optimizer that uses evaluator signals & feedback to improve **prompts/configs** and suggest **graph-level** changes. Maestro selects best model/tool/graph based on observed performance.
|
|
237
245
|
|
|
238
246
|
## Quickstart
|
|
239
247
|
|
|
@@ -249,10 +257,161 @@ uv add relai
|
|
|
249
257
|
export RELAI_API_KEY="<RELAI_API_KEY>"
|
|
250
258
|
```
|
|
251
259
|
|
|
260
|
+
### Example: A simple Stock Assistant Agent (Simulate → Evaluate → Optimize)
|
|
261
|
+
Prerequisites: Needs an OpenAI API key and `openai-agents` installed to run the base agent.
|
|
262
|
+
To use Maestro graph optimizer, save the following in a file called `stock-assistant.py` (or change the `code_paths` argument to `maestro.optimize_structure`).
|
|
263
|
+
```python
|
|
264
|
+
# ============================================================================
|
|
265
|
+
# STEP 0 — Prerequisites
|
|
266
|
+
# ============================================================================
|
|
267
|
+
# export OPENAI_API_KEY="sk-..."
|
|
268
|
+
# `uv add openai-agents`
|
|
269
|
+
# export RELAI_API_KEY="relai-..."
|
|
270
|
+
# Save as `stock-assistant.py`
|
|
271
|
+
|
|
272
|
+
import asyncio
|
|
273
|
+
|
|
274
|
+
from agents import Agent, Runner
|
|
275
|
+
|
|
276
|
+
from relai import (
|
|
277
|
+
AgentOutputs,
|
|
278
|
+
AsyncRELAI,
|
|
279
|
+
AsyncSimulator,
|
|
280
|
+
SimulationTape,
|
|
281
|
+
random_env_generator,
|
|
282
|
+
)
|
|
283
|
+
from relai.critico import Critico
|
|
284
|
+
from relai.critico.evaluate import RELAIFormatEvaluator
|
|
285
|
+
from relai.maestro import Maestro, params, register_param
|
|
286
|
+
from relai.mocker import Persona
|
|
287
|
+
from relai.simulator import simulated
|
|
288
|
+
|
|
289
|
+
# ============================================================================
|
|
290
|
+
# STEP 1.1 — Decorate inputs/tools that will be simulated
|
|
291
|
+
# ============================================================================
|
|
292
|
+
|
|
293
|
+
|
|
294
|
+
@simulated
|
|
295
|
+
async def get_user_query() -> str:
|
|
296
|
+
"""Get user's query about stock prices."""
|
|
297
|
+
# In a real agent, this function might get input from a chat interface.
|
|
298
|
+
return input("Enter you stock query: ")
|
|
299
|
+
|
|
300
|
+
|
|
301
|
+
# ============================================================================
|
|
302
|
+
# STEP 1.2 — Register parameters for optimization
|
|
303
|
+
# ============================================================================
|
|
304
|
+
|
|
305
|
+
register_param(
|
|
306
|
+
"prompt",
|
|
307
|
+
type="prompt",
|
|
308
|
+
init_value="You are a helpful assistant for stock price questions.",
|
|
309
|
+
desc="system prompt for the agent",
|
|
310
|
+
)
|
|
311
|
+
|
|
312
|
+
# ============================================================================
|
|
313
|
+
# STEP 2 — Your agent core
|
|
314
|
+
# ============================================================================
|
|
315
|
+
|
|
316
|
+
|
|
317
|
+
async def agent_fn(tape: SimulationTape) -> AgentOutputs:
|
|
318
|
+
# It is good practice to catch exceptions in agent function
|
|
319
|
+
# especially if the agent might raise errors with different configs
|
|
320
|
+
try:
|
|
321
|
+
question = await get_user_query()
|
|
322
|
+
agent = Agent(
|
|
323
|
+
name="Stock assistant",
|
|
324
|
+
instructions=params.prompt, # access registered parameter
|
|
325
|
+
model="gpt-5-mini",
|
|
326
|
+
)
|
|
327
|
+
result = await Runner.run(agent, question)
|
|
328
|
+
tape.extras["format_rubrics"] = {"Prices must include cents (eg: $XXX.XX)": 1.0}
|
|
329
|
+
tape.agent_inputs["question"] = question # trace inputs for later auditing
|
|
330
|
+
return {"summary": result.final_output}
|
|
331
|
+
except Exception as e:
|
|
332
|
+
return {"summary": str(e)}
|
|
333
|
+
|
|
334
|
+
|
|
335
|
+
|
|
336
|
+
async def main() -> None:
|
|
337
|
+
# Set up your simulation environment
|
|
338
|
+
# Bind Personas/MockTools to fully-qualified function names
|
|
339
|
+
env_generator = random_env_generator(
|
|
340
|
+
config_set={
|
|
341
|
+
"__main__.get_user_query": [Persona(user_persona="A polite and curious user.")],
|
|
342
|
+
}
|
|
343
|
+
)
|
|
344
|
+
|
|
345
|
+
async with AsyncRELAI() as client:
|
|
346
|
+
# ============================================================================
|
|
347
|
+
# STEP 3 — Simulate
|
|
348
|
+
# ============================================================================
|
|
349
|
+
simulator = AsyncSimulator(agent_fn=agent_fn, env_generator=env_generator, client=client)
|
|
350
|
+
agent_logs = await simulator.run(num_runs=1)
|
|
351
|
+
|
|
352
|
+
# ============================================================================
|
|
353
|
+
# STEP 4 — Evaluate with Critico
|
|
354
|
+
# ============================================================================
|
|
355
|
+
critico = Critico(client=client)
|
|
356
|
+
format_evaluator = RELAIFormatEvaluator(client=client)
|
|
357
|
+
critico.add_evaluators({format_evaluator: 1.0})
|
|
358
|
+
critico_logs = await critico.evaluate(agent_logs)
|
|
359
|
+
|
|
360
|
+
# Publish evaluation report to the RELAI platform
|
|
361
|
+
await critico.report(critico_logs)
|
|
362
|
+
|
|
363
|
+
maestro = Maestro(client=client, agent_fn=agent_fn, log_to_platform=True, name="Stock assistant")
|
|
364
|
+
maestro.add_setup(simulator=simulator, critico=critico)
|
|
365
|
+
|
|
366
|
+
# ============================================================================
|
|
367
|
+
# STEP 5.1 — Optimize configs with Maestro (the parameters registered earlier in STEP 2)
|
|
368
|
+
# ============================================================================
|
|
369
|
+
|
|
370
|
+
# params.load("saved_config.json") # load previous params if available
|
|
371
|
+
await maestro.optimize_config(
|
|
372
|
+
total_rollouts=20, # Total number of rollouts to use for optimization.
|
|
373
|
+
batch_size=2, # Base batch size to use for individual optimization steps. Defaults to 4.
|
|
374
|
+
explore_radius=1, # A positive integer controlling the aggressiveness of exploration during optimization.
|
|
375
|
+
explore_factor=0.5, # A float between 0 to 1 controlling the exploration-exploitation trade-off.
|
|
376
|
+
verbose=False, # If True, additional information will be printed during the optimization step.
|
|
377
|
+
)
|
|
378
|
+
params.save("saved_config.json") # save optimized params for future usage
|
|
379
|
+
|
|
380
|
+
# ============================================================================
|
|
381
|
+
# STEP 5.2 — Optimize agent structure with Maestro (changes that cannot be achieved by setting parameters alone)
|
|
382
|
+
# ============================================================================
|
|
383
|
+
|
|
384
|
+
await maestro.optimize_structure(
|
|
385
|
+
total_rollouts=10, # Total number of rollouts to use for optimization.
|
|
386
|
+
code_paths=["stock-assistant.py"], # A list of paths corresponding to code implementations of the agent.
|
|
387
|
+
verbose=False, # If True, additional information will be printed during the optimization step.
|
|
388
|
+
)
|
|
389
|
+
|
|
390
|
+
|
|
391
|
+
if __name__ == "__main__":
|
|
392
|
+
asyncio.run(main())
|
|
393
|
+
|
|
394
|
+
```
|
|
395
|
+
## Simulation
|
|
396
|
+
Create controlled environments where agents interact and generate traces. Compose LLM personas, mock MCP tools/servers, and synthetic data; optionally condition on real events to align simulation ⇄ production.
|
|
397
|
+
|
|
398
|
+
➡️ Learn more: [Simulator](https://docs.relai.ai/simulator.html)
|
|
399
|
+
|
|
400
|
+
## Evaluation (Critico)
|
|
401
|
+
Use code-based or LLM-based evaluators—or RELAI platform evaluators—and convert human reviews into benchmarks you can re-run in Simuation/CI pipeline.
|
|
402
|
+
|
|
403
|
+
➡️ Learn more: [Evaluator](https://docs.relai.ai/evaluator.html)
|
|
404
|
+
|
|
405
|
+
## Optimization (Maestro)
|
|
406
|
+
Maestro is a holistic agent optimizer. It consumes evaluator/user feedback to improve prompts, configs, and even graph structure when prompt tuning isn’t enough. It can also select the best model, best tool, and best graph based on observed performance.
|
|
407
|
+
|
|
408
|
+
➡️ Learn more: [Maestro](https://docs.relai.ai/maestro.html)
|
|
409
|
+
|
|
252
410
|
## Links
|
|
253
411
|
|
|
254
|
-
- 📘 **Documentation:** [docs.relai.ai](
|
|
412
|
+
- 📘 **Documentation:** [docs.relai.ai](http://docs.relai.ai)
|
|
255
413
|
- 🧪 **Examples:** [relai-sdk/examples](examples)
|
|
414
|
+
- 📖 **Tutorials:** [docs.relai.ai/tutorials/index.html](https://docs.relai.ai/tutorials/index.html)
|
|
256
415
|
- 🌐 **Website:** [relai.ai](https://relai.ai)
|
|
257
416
|
- 📰 **Maestro Technical Report:** [ArXiV](https://arxiv.org/abs/2509.04642)
|
|
258
417
|
- 🌐 **Join the Community:** [Discord](https://discord.gg/sjaHJ34YYE)
|
|
@@ -260,3 +419,30 @@ export RELAI_API_KEY="<RELAI_API_KEY>"
|
|
|
260
419
|
## License
|
|
261
420
|
|
|
262
421
|
Apache 2.0
|
|
422
|
+
|
|
423
|
+
## Citation
|
|
424
|
+
If you use the SDK in your research, please consider citing our work:
|
|
425
|
+
|
|
426
|
+
```
|
|
427
|
+
@misc{relai_sdk,
|
|
428
|
+
author = {RELAI, Inc.,},
|
|
429
|
+
title = {relai-sdk},
|
|
430
|
+
year = {2025},
|
|
431
|
+
howpublished = {\url{https://github.com/relai-ai/relai-sdk}},
|
|
432
|
+
note = {GitHub repository},
|
|
433
|
+
urldate = {2025-10-20}
|
|
434
|
+
}
|
|
435
|
+
|
|
436
|
+
@misc{wang2025maestrojointgraph,
|
|
437
|
+
title={Maestro: Joint Graph & Config Optimization for Reliable AI Agents},
|
|
438
|
+
author={Wenxiao Wang and Priyatham Kattakinda and Soheil Feizi},
|
|
439
|
+
year={2025},
|
|
440
|
+
eprint={2509.04642},
|
|
441
|
+
archivePrefix={arXiv},
|
|
442
|
+
primaryClass={cs.AI},
|
|
443
|
+
url={https://arxiv.org/abs/2509.04642},
|
|
444
|
+
}
|
|
445
|
+
```
|
|
446
|
+
|
|
447
|
+
<p align="center"> <sub>Made with ❤️ by the RELAI team — <a href="https://relai.ai">relai.ai</a> • <a href="https://discord.gg/sjaHJ34YYE">Community</a></sub> </p>
|
|
448
|
+
|
|
@@ -5,24 +5,24 @@ relai/benchmark.py,sha256=YTd2xu9aKlUcaWdHInV_7U5YroivYMgTk7UE1XMZBN4,15766
|
|
|
5
5
|
relai/data.py,sha256=ne0H4EQ0B_yxE9fogoovGExuJuwqutSpuhNsl4UmcsU,7852
|
|
6
6
|
relai/exporter.py,sha256=jZxrUjlYCOpRr7gdmbg6-LUL_fXmtMgPp89CgvP5Z7A,1932
|
|
7
7
|
relai/flags.py,sha256=_GrjQg7mZq7BwEIedR6cjWY4grwsryqbKdgyiRr2P7k,1929
|
|
8
|
-
relai/logger.py,sha256=
|
|
9
|
-
relai/simulator.py,sha256=
|
|
10
|
-
relai/utils.py,sha256=
|
|
8
|
+
relai/logger.py,sha256=j6PdzNkltukWAqBGKAB2qH2p61kS60RwsupDz-gELB4,18358
|
|
9
|
+
relai/simulator.py,sha256=oEC5oLODPo1vLGBaMUdDj0JovZlc595dez931ihDuXk,16465
|
|
10
|
+
relai/utils.py,sha256=va3xz79NTLJiZKaBrS_3Y8dC4M_JEmf8uOwzwFYYqUU,2359
|
|
11
11
|
relai/critico/__init__.py,sha256=c_mDXCVEzsQckDS4ZFOmANo8vB5Vjr1bvyQNimAPVR8,52
|
|
12
12
|
relai/critico/critico.py,sha256=J1ek9v2J5WBnHnZknZEVppIrWGczVHxuRX7ghK6mpXM,7616
|
|
13
13
|
relai/critico/evaluate.py,sha256=Bd-Hlsh2fz2AQ0SINoyqcdpdbWK2t8yrAPHv6UCueFY,31348
|
|
14
14
|
relai/maestro/__init__.py,sha256=NVXy0v7yghGwGbtsPti4gQGtVA3vMgXdpIpiJUesqME,186
|
|
15
15
|
relai/maestro/graph.py,sha256=SyY0rHzes3o5bSqlK66CQDUAeyChUhWJQM3FzJCBvfs,1850
|
|
16
|
-
relai/maestro/optimizer.py,sha256=
|
|
16
|
+
relai/maestro/optimizer.py,sha256=96rFxXN5bNDCSgOOPywbWk5AbbnJ6ncLK_Z2uh66sdU,29413
|
|
17
17
|
relai/maestro/params.py,sha256=-0Dtk23ClHJR6Q-PsaKr-GwUylz0-BIIquJF2eA-p-I,8925
|
|
18
18
|
relai/maestro/utils.py,sha256=WIE3cR8EMDVfAJozEfngh8DfOQdRPZMxxtN-M1cMmxo,7276
|
|
19
19
|
relai/mocker/__init__.py,sha256=JP2xlSG6Szc0tSEiZzCN6UXdE66uy7AmRn-p358xFVM,102
|
|
20
20
|
relai/mocker/base_mocker.py,sha256=BL4WYtdxWHZdKICfo9idW5i5MrkoxJDElcoeGk-jaJM,994
|
|
21
|
-
relai/mocker/persona.py,sha256=
|
|
22
|
-
relai/mocker/tool.py,sha256=
|
|
21
|
+
relai/mocker/persona.py,sha256=q2A_lwYrp7H6sKkguMIPl7FQ_6pL4kTaxGBJ1kU2aGA,6678
|
|
22
|
+
relai/mocker/tool.py,sha256=dHXkVcD9D6HMNlBj13V7GTgW_99a_-3tf9rC6iLDFn8,2229
|
|
23
23
|
relai/schema/visual.py,sha256=Y6BP5CHxLU0e7sTfNjgKmG2GD0R9a8rvITusxd-d-UE,2443
|
|
24
|
-
relai-0.3.
|
|
25
|
-
relai-0.3.
|
|
26
|
-
relai-0.3.
|
|
27
|
-
relai-0.3.
|
|
28
|
-
relai-0.3.
|
|
24
|
+
relai-0.3.4.dist-info/licenses/LICENSE.md,sha256=UNo7WT0mbmbUFjRGzRGaBtybmBPB7xd2ls9tfCkv0oc,10979
|
|
25
|
+
relai-0.3.4.dist-info/METADATA,sha256=VYH3VpOpZUXP7K-EQzu_cfhUrLDBOk5bICMTDs72Z54,23531
|
|
26
|
+
relai-0.3.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
27
|
+
relai-0.3.4.dist-info/top_level.txt,sha256=pRyA93fRj-HsukRNHyS4sHdvLO4TY8VvBMK44KcxRA4,6
|
|
28
|
+
relai-0.3.4.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|