fluxloop-cli 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of fluxloop-cli might be problematic. Click here for more details.
- fluxloop_cli/__init__.py +9 -0
- fluxloop_cli/arg_binder.py +219 -0
- fluxloop_cli/commands/__init__.py +5 -0
- fluxloop_cli/commands/config.py +355 -0
- fluxloop_cli/commands/generate.py +304 -0
- fluxloop_cli/commands/init.py +225 -0
- fluxloop_cli/commands/parse.py +293 -0
- fluxloop_cli/commands/run.py +310 -0
- fluxloop_cli/commands/status.py +227 -0
- fluxloop_cli/config_loader.py +159 -0
- fluxloop_cli/constants.py +12 -0
- fluxloop_cli/input_generator.py +158 -0
- fluxloop_cli/llm_generator.py +417 -0
- fluxloop_cli/main.py +97 -0
- fluxloop_cli/project_paths.py +80 -0
- fluxloop_cli/runner.py +634 -0
- fluxloop_cli/target_loader.py +95 -0
- fluxloop_cli/templates.py +277 -0
- fluxloop_cli/validators.py +31 -0
- fluxloop_cli-0.1.0.dist-info/METADATA +86 -0
- fluxloop_cli-0.1.0.dist-info/RECORD +24 -0
- fluxloop_cli-0.1.0.dist-info/WHEEL +5 -0
- fluxloop_cli-0.1.0.dist-info/entry_points.txt +2 -0
- fluxloop_cli-0.1.0.dist-info/top_level.txt +1 -0
fluxloop_cli/runner.py
ADDED
|
@@ -0,0 +1,634 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Runner modules for executing experiments and agents.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
import asyncio
|
|
8
|
+
import json
|
|
9
|
+
import time
|
|
10
|
+
from datetime import datetime
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from typing import Any, Callable, Dict, List, Optional, Sequence
|
|
13
|
+
|
|
14
|
+
import fluxloop
|
|
15
|
+
import yaml
|
|
16
|
+
|
|
17
|
+
from fluxloop.buffer import EventBuffer
|
|
18
|
+
from fluxloop.schemas import ExperimentConfig, PersonaConfig
|
|
19
|
+
from rich.console import Console
|
|
20
|
+
|
|
21
|
+
from .target_loader import TargetLoader
|
|
22
|
+
from .arg_binder import ArgBinder
|
|
23
|
+
|
|
24
|
+
console = Console()
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class ExperimentRunner:
|
|
28
|
+
"""Runner for full experiments with multiple iterations."""
|
|
29
|
+
|
|
30
|
+
def __init__(self, config: ExperimentConfig, no_collector: bool = False):
|
|
31
|
+
"""
|
|
32
|
+
Initialize the experiment runner.
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
config: Experiment configuration
|
|
36
|
+
no_collector: If True, disable sending to collector
|
|
37
|
+
"""
|
|
38
|
+
self.config = config
|
|
39
|
+
self.no_collector = no_collector
|
|
40
|
+
|
|
41
|
+
# Configure output directories (respect config location for relative paths)
|
|
42
|
+
output_base = Path(config.output_directory)
|
|
43
|
+
if not output_base.is_absolute():
|
|
44
|
+
source_dir = config.get_source_dir()
|
|
45
|
+
if source_dir:
|
|
46
|
+
output_base = (source_dir / output_base).resolve()
|
|
47
|
+
else:
|
|
48
|
+
output_base = (Path.cwd() / output_base).resolve()
|
|
49
|
+
|
|
50
|
+
output_base.mkdir(parents=True, exist_ok=True)
|
|
51
|
+
|
|
52
|
+
offline_dir = output_base / "artifacts"
|
|
53
|
+
fluxloop.configure(
|
|
54
|
+
use_collector=not no_collector and bool(config.collector_url),
|
|
55
|
+
collector_url=config.collector_url or None,
|
|
56
|
+
api_key=config.collector_api_key,
|
|
57
|
+
offline_store_enabled=True,
|
|
58
|
+
offline_store_dir=str(offline_dir),
|
|
59
|
+
)
|
|
60
|
+
self.offline_dir = offline_dir
|
|
61
|
+
|
|
62
|
+
# Create output directory
|
|
63
|
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
64
|
+
self.output_dir = output_base / f"{config.name}_{timestamp}"
|
|
65
|
+
self.output_dir.mkdir(parents=True, exist_ok=True)
|
|
66
|
+
|
|
67
|
+
# Results storage
|
|
68
|
+
self.results = {
|
|
69
|
+
"total_runs": 0,
|
|
70
|
+
"successful": 0,
|
|
71
|
+
"failed": 0,
|
|
72
|
+
"traces": [],
|
|
73
|
+
"errors": [],
|
|
74
|
+
"durations": [],
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
# Helpers for target loading and argument binding
|
|
78
|
+
self._arg_binder = ArgBinder(config)
|
|
79
|
+
|
|
80
|
+
def _load_agent(self) -> Callable:
|
|
81
|
+
"""Load the agent function from module path."""
|
|
82
|
+
loader = TargetLoader(self.config.runner, source_dir=self.config.get_source_dir())
|
|
83
|
+
try:
|
|
84
|
+
return loader.load()
|
|
85
|
+
except ValueError as exc:
|
|
86
|
+
raise RuntimeError(str(exc)) from exc
|
|
87
|
+
|
|
88
|
+
async def run_experiment(self, progress_callback: Optional[Callable] = None) -> Dict[str, Any]:
|
|
89
|
+
"""
|
|
90
|
+
Run the complete experiment.
|
|
91
|
+
|
|
92
|
+
Args:
|
|
93
|
+
progress_callback: Optional callback for progress updates
|
|
94
|
+
|
|
95
|
+
Returns:
|
|
96
|
+
Experiment results summary
|
|
97
|
+
"""
|
|
98
|
+
start_time = time.time()
|
|
99
|
+
|
|
100
|
+
# Load agent module
|
|
101
|
+
agent_func = self._load_agent()
|
|
102
|
+
|
|
103
|
+
inputs = await self._load_inputs()
|
|
104
|
+
|
|
105
|
+
persona_map = {persona.name: persona for persona in (self.config.personas or [])}
|
|
106
|
+
use_entry_persona = self.config.has_external_inputs()
|
|
107
|
+
|
|
108
|
+
delay = getattr(self.config, "run_delay_seconds", 0) or 0
|
|
109
|
+
|
|
110
|
+
# Run iterations
|
|
111
|
+
for iteration in range(self.config.iterations):
|
|
112
|
+
if use_entry_persona:
|
|
113
|
+
for entry in inputs:
|
|
114
|
+
persona = self._resolve_entry_persona(entry, persona_map)
|
|
115
|
+
await self._run_single(
|
|
116
|
+
agent_func,
|
|
117
|
+
entry,
|
|
118
|
+
persona,
|
|
119
|
+
iteration,
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
if progress_callback:
|
|
123
|
+
progress_callback()
|
|
124
|
+
|
|
125
|
+
if delay > 0:
|
|
126
|
+
await asyncio.sleep(delay)
|
|
127
|
+
else:
|
|
128
|
+
personas = self.config.personas or [None]
|
|
129
|
+
for persona in personas:
|
|
130
|
+
for entry in inputs:
|
|
131
|
+
await self._run_single(
|
|
132
|
+
agent_func,
|
|
133
|
+
entry,
|
|
134
|
+
persona,
|
|
135
|
+
iteration,
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
if progress_callback:
|
|
139
|
+
progress_callback()
|
|
140
|
+
|
|
141
|
+
if delay > 0:
|
|
142
|
+
await asyncio.sleep(delay)
|
|
143
|
+
|
|
144
|
+
if use_entry_persona:
|
|
145
|
+
self.config.set_resolved_persona_count(1)
|
|
146
|
+
else:
|
|
147
|
+
persona_multiplier = len(self.config.personas) if self.config.personas else 1
|
|
148
|
+
self.config.set_resolved_persona_count(persona_multiplier)
|
|
149
|
+
|
|
150
|
+
# Calculate summary statistics
|
|
151
|
+
end_time = time.time()
|
|
152
|
+
self.results["duration_seconds"] = end_time - start_time
|
|
153
|
+
self.results["success_rate"] = (
|
|
154
|
+
self.results["successful"] / self.results["total_runs"]
|
|
155
|
+
if self.results["total_runs"] > 0
|
|
156
|
+
else 0
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
if self.results["durations"]:
|
|
160
|
+
self.results["avg_duration_ms"] = sum(self.results["durations"]) / len(self.results["durations"])
|
|
161
|
+
else:
|
|
162
|
+
self.results["avg_duration_ms"] = 0
|
|
163
|
+
|
|
164
|
+
# Save results
|
|
165
|
+
self._save_results()
|
|
166
|
+
|
|
167
|
+
return {
|
|
168
|
+
"total_runs": self.results["total_runs"],
|
|
169
|
+
"successful": self.results["successful"],
|
|
170
|
+
"failed": self.results["failed"],
|
|
171
|
+
"success_rate": self.results["success_rate"],
|
|
172
|
+
"avg_duration_ms": self.results["avg_duration_ms"],
|
|
173
|
+
"output_dir": str(self.output_dir),
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
async def _load_inputs(self) -> List[Dict[str, Any]]:
|
|
177
|
+
"""Load input entries from configuration or external files."""
|
|
178
|
+
if not self.config.inputs_file:
|
|
179
|
+
raise ValueError(
|
|
180
|
+
"inputs_file is not configured. Generate inputs with "
|
|
181
|
+
"`fluxloop generate inputs --project <name>` and set the generated file "
|
|
182
|
+
"in setting.yaml before running experiments."
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
inputs = self._load_external_inputs()
|
|
186
|
+
self.config.set_resolved_input_count(len(inputs))
|
|
187
|
+
if self.config.has_external_inputs():
|
|
188
|
+
self.config.set_resolved_persona_count(1)
|
|
189
|
+
else:
|
|
190
|
+
persona_multiplier = len(self.config.personas) if self.config.personas else 1
|
|
191
|
+
self.config.set_resolved_persona_count(persona_multiplier)
|
|
192
|
+
return inputs
|
|
193
|
+
|
|
194
|
+
def _load_external_inputs(self) -> List[Dict[str, Any]]:
|
|
195
|
+
"""Load variations from an external file."""
|
|
196
|
+
source_dir = self.config.get_source_dir()
|
|
197
|
+
raw_path = Path(self.config.inputs_file) # type: ignore[arg-type]
|
|
198
|
+
inputs_path = (source_dir / raw_path if source_dir and not raw_path.is_absolute() else raw_path).resolve()
|
|
199
|
+
if not inputs_path.exists():
|
|
200
|
+
raise FileNotFoundError(f"Inputs file not found: {inputs_path}")
|
|
201
|
+
|
|
202
|
+
with open(inputs_path, "r", encoding="utf-8") as f:
|
|
203
|
+
data = yaml.safe_load(f)
|
|
204
|
+
|
|
205
|
+
if not data:
|
|
206
|
+
raise ValueError(f"Inputs file is empty: {inputs_path}")
|
|
207
|
+
|
|
208
|
+
# Support either top-level list or dict with "inputs"
|
|
209
|
+
entries: List[Dict[str, Any]]
|
|
210
|
+
variations: List[Dict[str, Any]] = []
|
|
211
|
+
if isinstance(data, dict) and "inputs" in data:
|
|
212
|
+
entries = data["inputs"]
|
|
213
|
+
elif isinstance(data, list):
|
|
214
|
+
entries = data
|
|
215
|
+
else:
|
|
216
|
+
raise ValueError(
|
|
217
|
+
"Inputs file must be a list of inputs or a mapping containing an 'inputs' list"
|
|
218
|
+
)
|
|
219
|
+
|
|
220
|
+
for index, item in enumerate(entries):
|
|
221
|
+
if not isinstance(item, dict):
|
|
222
|
+
raise ValueError(
|
|
223
|
+
f"Input entry at index {index} must be a mapping, got {type(item).__name__}"
|
|
224
|
+
)
|
|
225
|
+
|
|
226
|
+
input_value = item.get("input")
|
|
227
|
+
if not input_value:
|
|
228
|
+
raise ValueError(f"Input entry at index {index} is missing required 'input' field")
|
|
229
|
+
|
|
230
|
+
variations.append({
|
|
231
|
+
"input": input_value,
|
|
232
|
+
"metadata": item.get("metadata", item),
|
|
233
|
+
"source": "external_file",
|
|
234
|
+
"source_index": index,
|
|
235
|
+
})
|
|
236
|
+
|
|
237
|
+
if not variations:
|
|
238
|
+
raise ValueError(f"Inputs file {inputs_path} did not contain any inputs")
|
|
239
|
+
|
|
240
|
+
return variations
|
|
241
|
+
|
|
242
|
+
async def _run_single(
|
|
243
|
+
self,
|
|
244
|
+
agent_func: Callable,
|
|
245
|
+
variation: Dict[str, Any],
|
|
246
|
+
persona: Optional[PersonaConfig],
|
|
247
|
+
iteration: int,
|
|
248
|
+
) -> None:
|
|
249
|
+
"""Run a single execution."""
|
|
250
|
+
self.results["total_runs"] += 1
|
|
251
|
+
|
|
252
|
+
# Create trace name
|
|
253
|
+
trace_name = f"{self.config.name}_iter{iteration}"
|
|
254
|
+
if persona:
|
|
255
|
+
trace_name += f"_persona_{persona.name}"
|
|
256
|
+
|
|
257
|
+
# Prepare input
|
|
258
|
+
input_text = variation["input"]
|
|
259
|
+
if persona and self.config.input_template:
|
|
260
|
+
# Apply persona to input template
|
|
261
|
+
input_text = self.config.input_template.format(
|
|
262
|
+
input=input_text,
|
|
263
|
+
persona=persona.to_prompt(),
|
|
264
|
+
)
|
|
265
|
+
|
|
266
|
+
# Run with instrumentation
|
|
267
|
+
start_time = time.time()
|
|
268
|
+
|
|
269
|
+
try:
|
|
270
|
+
callback_messages: Dict[str, Any] = {}
|
|
271
|
+
trace_id: Optional[str] = None
|
|
272
|
+
result: Any
|
|
273
|
+
|
|
274
|
+
with fluxloop.instrument(trace_name) as ctx:
|
|
275
|
+
if hasattr(ctx, "trace") and getattr(ctx, "trace") is not None:
|
|
276
|
+
trace_id = str(ctx.trace.id)
|
|
277
|
+
ctx.add_metadata("trace_id", trace_id)
|
|
278
|
+
|
|
279
|
+
# Add metadata
|
|
280
|
+
ctx.add_metadata("iteration", iteration)
|
|
281
|
+
ctx.add_metadata("variation", variation)
|
|
282
|
+
if persona:
|
|
283
|
+
ctx.add_metadata("persona", persona.name)
|
|
284
|
+
|
|
285
|
+
# Run agent
|
|
286
|
+
result = await self._call_agent(
|
|
287
|
+
agent_func,
|
|
288
|
+
input_text,
|
|
289
|
+
iteration=iteration,
|
|
290
|
+
callback_store=callback_messages,
|
|
291
|
+
)
|
|
292
|
+
|
|
293
|
+
# Allow background callbacks to flush
|
|
294
|
+
await self._wait_for_callbacks(callback_messages)
|
|
295
|
+
|
|
296
|
+
send_messages = callback_messages.get("send", [])
|
|
297
|
+
error_messages = callback_messages.get("error", [])
|
|
298
|
+
|
|
299
|
+
if send_messages or error_messages:
|
|
300
|
+
ctx.add_metadata(
|
|
301
|
+
"callback_messages",
|
|
302
|
+
{
|
|
303
|
+
"send": send_messages,
|
|
304
|
+
"error": error_messages,
|
|
305
|
+
},
|
|
306
|
+
)
|
|
307
|
+
|
|
308
|
+
# Force flush buffered events so observations are persisted
|
|
309
|
+
EventBuffer.get_instance().flush()
|
|
310
|
+
|
|
311
|
+
observations: List[Dict[str, Any]] = []
|
|
312
|
+
if trace_id:
|
|
313
|
+
observations = self._load_observations_for_trace(trace_id)
|
|
314
|
+
|
|
315
|
+
final_output = self._extract_final_output(callback_messages, observations)
|
|
316
|
+
if final_output is not None:
|
|
317
|
+
result = final_output
|
|
318
|
+
|
|
319
|
+
# Mark successful
|
|
320
|
+
self.results["successful"] += 1
|
|
321
|
+
|
|
322
|
+
# Record duration
|
|
323
|
+
duration_ms = (time.time() - start_time) * 1000
|
|
324
|
+
self.results["durations"].append(duration_ms)
|
|
325
|
+
|
|
326
|
+
trace_entry = {
|
|
327
|
+
"trace_id": trace_id,
|
|
328
|
+
"iteration": iteration,
|
|
329
|
+
"persona": persona.name if persona else None,
|
|
330
|
+
"input": input_text,
|
|
331
|
+
"output": result,
|
|
332
|
+
"duration_ms": duration_ms,
|
|
333
|
+
"success": True,
|
|
334
|
+
}
|
|
335
|
+
|
|
336
|
+
send_messages = callback_messages.get("send", [])
|
|
337
|
+
error_messages = callback_messages.get("error", [])
|
|
338
|
+
|
|
339
|
+
if send_messages or error_messages:
|
|
340
|
+
trace_entry["callback_messages"] = {
|
|
341
|
+
"send": [self._serialize_callback(args, kwargs) for args, kwargs in send_messages],
|
|
342
|
+
"error": [self._serialize_callback(args, kwargs) for args, kwargs in error_messages],
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
if observations:
|
|
346
|
+
trace_entry["observation_count"] = len(observations)
|
|
347
|
+
|
|
348
|
+
self.results["traces"].append(trace_entry)
|
|
349
|
+
|
|
350
|
+
except Exception as e:
|
|
351
|
+
# Record failure
|
|
352
|
+
self.results["failed"] += 1
|
|
353
|
+
self.results["errors"].append({
|
|
354
|
+
"iteration": iteration,
|
|
355
|
+
"persona": persona.name if persona else None,
|
|
356
|
+
"input": input_text,
|
|
357
|
+
"error": str(e),
|
|
358
|
+
})
|
|
359
|
+
|
|
360
|
+
def _resolve_entry_persona(
|
|
361
|
+
self,
|
|
362
|
+
entry: Dict[str, Any],
|
|
363
|
+
persona_map: Dict[str, PersonaConfig],
|
|
364
|
+
) -> Optional[PersonaConfig]:
|
|
365
|
+
"""Select persona metadata from an input entry when available."""
|
|
366
|
+
|
|
367
|
+
metadata = entry.get("metadata") or {}
|
|
368
|
+
persona_name = metadata.get("persona")
|
|
369
|
+
|
|
370
|
+
if persona_name and persona_name in persona_map:
|
|
371
|
+
return persona_map[persona_name]
|
|
372
|
+
|
|
373
|
+
return None
|
|
374
|
+
|
|
375
|
+
async def _call_agent(
|
|
376
|
+
self,
|
|
377
|
+
agent_func: Callable,
|
|
378
|
+
input_text: str,
|
|
379
|
+
iteration: int = 0,
|
|
380
|
+
callback_store: Optional[Dict[str, Any]] = None,
|
|
381
|
+
) -> Any:
|
|
382
|
+
"""Call the agent with arguments bound by ArgBinder (sync or async)."""
|
|
383
|
+
|
|
384
|
+
kwargs = self._arg_binder.bind_call_args(
|
|
385
|
+
agent_func,
|
|
386
|
+
runtime_input=input_text,
|
|
387
|
+
iteration=iteration,
|
|
388
|
+
)
|
|
389
|
+
|
|
390
|
+
# Attach collector callback capture if present
|
|
391
|
+
if callback_store is not None:
|
|
392
|
+
send_cb = kwargs.get("send_message_callback")
|
|
393
|
+
if callable(send_cb) and hasattr(send_cb, "messages"):
|
|
394
|
+
callback_store["send"] = send_cb.messages
|
|
395
|
+
|
|
396
|
+
error_cb = kwargs.get("send_error_callback")
|
|
397
|
+
if callable(error_cb) and hasattr(error_cb, "errors"):
|
|
398
|
+
callback_store["error"] = error_cb.errors
|
|
399
|
+
|
|
400
|
+
if asyncio.iscoroutinefunction(agent_func):
|
|
401
|
+
return await agent_func(**kwargs)
|
|
402
|
+
|
|
403
|
+
loop = asyncio.get_event_loop()
|
|
404
|
+
return await loop.run_in_executor(None, lambda: agent_func(**kwargs))
|
|
405
|
+
|
|
406
|
+
async def _wait_for_callbacks(
|
|
407
|
+
self,
|
|
408
|
+
callback_messages: Dict[str, Any],
|
|
409
|
+
*,
|
|
410
|
+
timeout_seconds: float = 5.0,
|
|
411
|
+
poll_interval: float = 0.1,
|
|
412
|
+
) -> None:
|
|
413
|
+
"""Wait briefly for background callbacks to populate the capture lists."""
|
|
414
|
+
if not callback_messages:
|
|
415
|
+
return
|
|
416
|
+
|
|
417
|
+
deadline = time.time() + timeout_seconds
|
|
418
|
+
while time.time() < deadline:
|
|
419
|
+
if callback_messages.get("send") or callback_messages.get("error"):
|
|
420
|
+
break
|
|
421
|
+
await asyncio.sleep(poll_interval)
|
|
422
|
+
|
|
423
|
+
def _load_observations_for_trace(self, trace_id: str) -> List[Dict[str, Any]]:
|
|
424
|
+
"""Load observations from the offline store that match the given trace_id."""
|
|
425
|
+
observations_path = self.offline_dir / "observations.jsonl"
|
|
426
|
+
if not observations_path.exists():
|
|
427
|
+
return []
|
|
428
|
+
|
|
429
|
+
matched: List[Dict[str, Any]] = []
|
|
430
|
+
try:
|
|
431
|
+
with observations_path.open("r", encoding="utf-8") as src:
|
|
432
|
+
for line in src:
|
|
433
|
+
if not line.strip():
|
|
434
|
+
continue
|
|
435
|
+
try:
|
|
436
|
+
data = json.loads(line)
|
|
437
|
+
except json.JSONDecodeError:
|
|
438
|
+
continue
|
|
439
|
+
if data.get("trace_id") == trace_id:
|
|
440
|
+
matched.append(data)
|
|
441
|
+
except OSError:
|
|
442
|
+
return []
|
|
443
|
+
|
|
444
|
+
return matched
|
|
445
|
+
|
|
446
|
+
def _extract_final_output(
|
|
447
|
+
self,
|
|
448
|
+
callback_messages: Dict[str, Any],
|
|
449
|
+
observations: List[Dict[str, Any]],
|
|
450
|
+
) -> Any:
|
|
451
|
+
"""Derive the final output from callbacks or observations."""
|
|
452
|
+
for observation in reversed(observations):
|
|
453
|
+
if observation.get("name") == "agent_final_response" and observation.get("output") is not None:
|
|
454
|
+
return observation.get("output")
|
|
455
|
+
|
|
456
|
+
for observation in reversed(observations):
|
|
457
|
+
if observation.get("type") == "agent" and observation.get("output") is not None:
|
|
458
|
+
return observation.get("output")
|
|
459
|
+
|
|
460
|
+
send_messages = callback_messages.get("send") if callback_messages else None
|
|
461
|
+
if send_messages:
|
|
462
|
+
last_args, last_kwargs = send_messages[-1]
|
|
463
|
+
return self._extract_payload(last_args, last_kwargs)
|
|
464
|
+
|
|
465
|
+
return None
|
|
466
|
+
|
|
467
|
+
@staticmethod
|
|
468
|
+
def _extract_payload(args: Sequence[Any], kwargs: Dict[str, Any]) -> Any:
|
|
469
|
+
if kwargs:
|
|
470
|
+
return kwargs
|
|
471
|
+
|
|
472
|
+
if not args:
|
|
473
|
+
return None
|
|
474
|
+
|
|
475
|
+
if len(args) == 1:
|
|
476
|
+
return args[0]
|
|
477
|
+
|
|
478
|
+
return list(args)
|
|
479
|
+
|
|
480
|
+
@staticmethod
|
|
481
|
+
def _serialize_callback(args: Sequence[Any], kwargs: Dict[str, Any]) -> Any:
|
|
482
|
+
"""Serialize callback arguments for JSON storage."""
|
|
483
|
+
if len(args) == 1 and not kwargs:
|
|
484
|
+
return args[0]
|
|
485
|
+
return {
|
|
486
|
+
"args": list(args),
|
|
487
|
+
"kwargs": kwargs,
|
|
488
|
+
}
|
|
489
|
+
|
|
490
|
+
def _save_results(self) -> None:
|
|
491
|
+
"""Save results to output directory."""
|
|
492
|
+
# Save summary
|
|
493
|
+
summary_file = self.output_dir / "summary.json"
|
|
494
|
+
summary = {
|
|
495
|
+
"name": self.config.name,
|
|
496
|
+
"date": datetime.now().isoformat(),
|
|
497
|
+
"config": self.config.to_dict(),
|
|
498
|
+
"results": {
|
|
499
|
+
"total_runs": self.results["total_runs"],
|
|
500
|
+
"successful": self.results["successful"],
|
|
501
|
+
"failed": self.results["failed"],
|
|
502
|
+
"success_rate": self.results["success_rate"],
|
|
503
|
+
"avg_duration_ms": self.results["avg_duration_ms"],
|
|
504
|
+
"duration_seconds": self.results["duration_seconds"],
|
|
505
|
+
},
|
|
506
|
+
}
|
|
507
|
+
summary_file.write_text(json.dumps(summary, indent=2))
|
|
508
|
+
|
|
509
|
+
if self.config.save_traces:
|
|
510
|
+
self._save_trace_summary()
|
|
511
|
+
self._save_experiment_observations()
|
|
512
|
+
|
|
513
|
+
# Save errors
|
|
514
|
+
if self.results["errors"]:
|
|
515
|
+
errors_file = self.output_dir / "errors.json"
|
|
516
|
+
errors_file.write_text(json.dumps(self.results["errors"], indent=2))
|
|
517
|
+
|
|
518
|
+
def _save_trace_summary(self) -> None:
|
|
519
|
+
"""Persist detailed and summary trace information for the experiment."""
|
|
520
|
+
full_traces_path = self.output_dir / "traces.jsonl"
|
|
521
|
+
summary_path = self.output_dir / "trace_summary.jsonl"
|
|
522
|
+
|
|
523
|
+
with full_traces_path.open("w", encoding="utf-8") as full_file:
|
|
524
|
+
for trace in self.results["traces"]:
|
|
525
|
+
full_file.write(json.dumps(trace) + "\n")
|
|
526
|
+
|
|
527
|
+
with summary_path.open("w", encoding="utf-8") as summary_file:
|
|
528
|
+
for trace in self.results["traces"]:
|
|
529
|
+
summary_file.write(
|
|
530
|
+
json.dumps(
|
|
531
|
+
{
|
|
532
|
+
"trace_id": trace.get("trace_id"),
|
|
533
|
+
"iteration": trace.get("iteration"),
|
|
534
|
+
"persona": trace.get("persona"),
|
|
535
|
+
"input": trace.get("input"),
|
|
536
|
+
"output": trace.get("output"),
|
|
537
|
+
"duration_ms": trace.get("duration_ms"),
|
|
538
|
+
"success": trace.get("success"),
|
|
539
|
+
}
|
|
540
|
+
)
|
|
541
|
+
+ "\n"
|
|
542
|
+
)
|
|
543
|
+
|
|
544
|
+
def _save_experiment_observations(self) -> None:
|
|
545
|
+
"""Copy matching observations from the offline store into the experiment directory."""
|
|
546
|
+
trace_ids = {
|
|
547
|
+
trace["trace_id"]
|
|
548
|
+
for trace in self.results["traces"]
|
|
549
|
+
if trace.get("trace_id")
|
|
550
|
+
}
|
|
551
|
+
|
|
552
|
+
if not trace_ids:
|
|
553
|
+
return
|
|
554
|
+
|
|
555
|
+
source_path = self.offline_dir / "observations.jsonl"
|
|
556
|
+
if not source_path.exists():
|
|
557
|
+
console.print(
|
|
558
|
+
f"[yellow]⚠️ Observations file not found: {source_path}[/yellow]"
|
|
559
|
+
)
|
|
560
|
+
return
|
|
561
|
+
|
|
562
|
+
destination = self.output_dir / "observations.jsonl"
|
|
563
|
+
copied = 0
|
|
564
|
+
|
|
565
|
+
with source_path.open("r", encoding="utf-8") as src, destination.open(
|
|
566
|
+
"w", encoding="utf-8"
|
|
567
|
+
) as dst:
|
|
568
|
+
for line in src:
|
|
569
|
+
if not line.strip():
|
|
570
|
+
continue
|
|
571
|
+
try:
|
|
572
|
+
record = json.loads(line)
|
|
573
|
+
except json.JSONDecodeError:
|
|
574
|
+
continue
|
|
575
|
+
if record.get("trace_id") in trace_ids:
|
|
576
|
+
dst.write(json.dumps(record) + "\n")
|
|
577
|
+
copied += 1
|
|
578
|
+
|
|
579
|
+
console.print(
|
|
580
|
+
f"[green]✅ Saved {copied} observations to {destination.name}[/green]"
|
|
581
|
+
)
|
|
582
|
+
|
|
583
|
+
|
|
584
|
+
class SingleRunner:
|
|
585
|
+
"""Runner for single agent executions."""
|
|
586
|
+
|
|
587
|
+
def __init__(
|
|
588
|
+
self,
|
|
589
|
+
module_path: str,
|
|
590
|
+
function_name: str = "run",
|
|
591
|
+
trace_name: Optional[str] = None,
|
|
592
|
+
no_collector: bool = False,
|
|
593
|
+
):
|
|
594
|
+
"""
|
|
595
|
+
Initialize single runner.
|
|
596
|
+
|
|
597
|
+
Args:
|
|
598
|
+
module_path: Module path to agent
|
|
599
|
+
function_name: Function to call
|
|
600
|
+
trace_name: Name for the trace
|
|
601
|
+
no_collector: If True, disable collector
|
|
602
|
+
"""
|
|
603
|
+
self.module_path = module_path
|
|
604
|
+
self.function_name = function_name
|
|
605
|
+
self.trace_name = trace_name or f"single_{module_path}"
|
|
606
|
+
|
|
607
|
+
if no_collector:
|
|
608
|
+
fluxloop.configure(enabled=False)
|
|
609
|
+
|
|
610
|
+
async def run(self, input_text: str) -> Any:
|
|
611
|
+
"""
|
|
612
|
+
Run the agent once.
|
|
613
|
+
|
|
614
|
+
Args:
|
|
615
|
+
input_text: Input for the agent
|
|
616
|
+
|
|
617
|
+
Returns:
|
|
618
|
+
Agent output
|
|
619
|
+
"""
|
|
620
|
+
# Load agent
|
|
621
|
+
try:
|
|
622
|
+
module = importlib.import_module(self.module_path)
|
|
623
|
+
agent_func = getattr(module, self.function_name)
|
|
624
|
+
except (ImportError, AttributeError) as e:
|
|
625
|
+
raise RuntimeError(f"Failed to load agent: {e}")
|
|
626
|
+
|
|
627
|
+
# Run with instrumentation
|
|
628
|
+
with fluxloop.instrument(self.trace_name):
|
|
629
|
+
if asyncio.iscoroutinefunction(agent_func):
|
|
630
|
+
return await agent_func(input_text)
|
|
631
|
+
else:
|
|
632
|
+
# Run sync function in executor
|
|
633
|
+
loop = asyncio.get_event_loop()
|
|
634
|
+
return await loop.run_in_executor(None, agent_func, input_text)
|