fluxloop-cli 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of fluxloop-cli might be problematic. Click here for more details.

fluxloop_cli/runner.py ADDED
@@ -0,0 +1,634 @@
1
+ """
2
+ Runner modules for executing experiments and agents.
3
+ """
4
+
5
+ from __future__ import annotations
6
+
7
+ import asyncio
8
+ import json
9
+ import time
10
+ from datetime import datetime
11
+ from pathlib import Path
12
+ from typing import Any, Callable, Dict, List, Optional, Sequence
13
+
14
+ import fluxloop
15
+ import yaml
16
+
17
+ from fluxloop.buffer import EventBuffer
18
+ from fluxloop.schemas import ExperimentConfig, PersonaConfig
19
+ from rich.console import Console
20
+
21
+ from .target_loader import TargetLoader
22
+ from .arg_binder import ArgBinder
23
+
24
+ console = Console()
25
+
26
+
27
+ class ExperimentRunner:
28
+ """Runner for full experiments with multiple iterations."""
29
+
30
+ def __init__(self, config: ExperimentConfig, no_collector: bool = False):
31
+ """
32
+ Initialize the experiment runner.
33
+
34
+ Args:
35
+ config: Experiment configuration
36
+ no_collector: If True, disable sending to collector
37
+ """
38
+ self.config = config
39
+ self.no_collector = no_collector
40
+
41
+ # Configure output directories (respect config location for relative paths)
42
+ output_base = Path(config.output_directory)
43
+ if not output_base.is_absolute():
44
+ source_dir = config.get_source_dir()
45
+ if source_dir:
46
+ output_base = (source_dir / output_base).resolve()
47
+ else:
48
+ output_base = (Path.cwd() / output_base).resolve()
49
+
50
+ output_base.mkdir(parents=True, exist_ok=True)
51
+
52
+ offline_dir = output_base / "artifacts"
53
+ fluxloop.configure(
54
+ use_collector=not no_collector and bool(config.collector_url),
55
+ collector_url=config.collector_url or None,
56
+ api_key=config.collector_api_key,
57
+ offline_store_enabled=True,
58
+ offline_store_dir=str(offline_dir),
59
+ )
60
+ self.offline_dir = offline_dir
61
+
62
+ # Create output directory
63
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
64
+ self.output_dir = output_base / f"{config.name}_{timestamp}"
65
+ self.output_dir.mkdir(parents=True, exist_ok=True)
66
+
67
+ # Results storage
68
+ self.results = {
69
+ "total_runs": 0,
70
+ "successful": 0,
71
+ "failed": 0,
72
+ "traces": [],
73
+ "errors": [],
74
+ "durations": [],
75
+ }
76
+
77
+ # Helpers for target loading and argument binding
78
+ self._arg_binder = ArgBinder(config)
79
+
80
+ def _load_agent(self) -> Callable:
81
+ """Load the agent function from module path."""
82
+ loader = TargetLoader(self.config.runner, source_dir=self.config.get_source_dir())
83
+ try:
84
+ return loader.load()
85
+ except ValueError as exc:
86
+ raise RuntimeError(str(exc)) from exc
87
+
88
+ async def run_experiment(self, progress_callback: Optional[Callable] = None) -> Dict[str, Any]:
89
+ """
90
+ Run the complete experiment.
91
+
92
+ Args:
93
+ progress_callback: Optional callback for progress updates
94
+
95
+ Returns:
96
+ Experiment results summary
97
+ """
98
+ start_time = time.time()
99
+
100
+ # Load agent module
101
+ agent_func = self._load_agent()
102
+
103
+ inputs = await self._load_inputs()
104
+
105
+ persona_map = {persona.name: persona for persona in (self.config.personas or [])}
106
+ use_entry_persona = self.config.has_external_inputs()
107
+
108
+ delay = getattr(self.config, "run_delay_seconds", 0) or 0
109
+
110
+ # Run iterations
111
+ for iteration in range(self.config.iterations):
112
+ if use_entry_persona:
113
+ for entry in inputs:
114
+ persona = self._resolve_entry_persona(entry, persona_map)
115
+ await self._run_single(
116
+ agent_func,
117
+ entry,
118
+ persona,
119
+ iteration,
120
+ )
121
+
122
+ if progress_callback:
123
+ progress_callback()
124
+
125
+ if delay > 0:
126
+ await asyncio.sleep(delay)
127
+ else:
128
+ personas = self.config.personas or [None]
129
+ for persona in personas:
130
+ for entry in inputs:
131
+ await self._run_single(
132
+ agent_func,
133
+ entry,
134
+ persona,
135
+ iteration,
136
+ )
137
+
138
+ if progress_callback:
139
+ progress_callback()
140
+
141
+ if delay > 0:
142
+ await asyncio.sleep(delay)
143
+
144
+ if use_entry_persona:
145
+ self.config.set_resolved_persona_count(1)
146
+ else:
147
+ persona_multiplier = len(self.config.personas) if self.config.personas else 1
148
+ self.config.set_resolved_persona_count(persona_multiplier)
149
+
150
+ # Calculate summary statistics
151
+ end_time = time.time()
152
+ self.results["duration_seconds"] = end_time - start_time
153
+ self.results["success_rate"] = (
154
+ self.results["successful"] / self.results["total_runs"]
155
+ if self.results["total_runs"] > 0
156
+ else 0
157
+ )
158
+
159
+ if self.results["durations"]:
160
+ self.results["avg_duration_ms"] = sum(self.results["durations"]) / len(self.results["durations"])
161
+ else:
162
+ self.results["avg_duration_ms"] = 0
163
+
164
+ # Save results
165
+ self._save_results()
166
+
167
+ return {
168
+ "total_runs": self.results["total_runs"],
169
+ "successful": self.results["successful"],
170
+ "failed": self.results["failed"],
171
+ "success_rate": self.results["success_rate"],
172
+ "avg_duration_ms": self.results["avg_duration_ms"],
173
+ "output_dir": str(self.output_dir),
174
+ }
175
+
176
+ async def _load_inputs(self) -> List[Dict[str, Any]]:
177
+ """Load input entries from configuration or external files."""
178
+ if not self.config.inputs_file:
179
+ raise ValueError(
180
+ "inputs_file is not configured. Generate inputs with "
181
+ "`fluxloop generate inputs --project <name>` and set the generated file "
182
+ "in setting.yaml before running experiments."
183
+ )
184
+
185
+ inputs = self._load_external_inputs()
186
+ self.config.set_resolved_input_count(len(inputs))
187
+ if self.config.has_external_inputs():
188
+ self.config.set_resolved_persona_count(1)
189
+ else:
190
+ persona_multiplier = len(self.config.personas) if self.config.personas else 1
191
+ self.config.set_resolved_persona_count(persona_multiplier)
192
+ return inputs
193
+
194
+ def _load_external_inputs(self) -> List[Dict[str, Any]]:
195
+ """Load variations from an external file."""
196
+ source_dir = self.config.get_source_dir()
197
+ raw_path = Path(self.config.inputs_file) # type: ignore[arg-type]
198
+ inputs_path = (source_dir / raw_path if source_dir and not raw_path.is_absolute() else raw_path).resolve()
199
+ if not inputs_path.exists():
200
+ raise FileNotFoundError(f"Inputs file not found: {inputs_path}")
201
+
202
+ with open(inputs_path, "r", encoding="utf-8") as f:
203
+ data = yaml.safe_load(f)
204
+
205
+ if not data:
206
+ raise ValueError(f"Inputs file is empty: {inputs_path}")
207
+
208
+ # Support either top-level list or dict with "inputs"
209
+ entries: List[Dict[str, Any]]
210
+ variations: List[Dict[str, Any]] = []
211
+ if isinstance(data, dict) and "inputs" in data:
212
+ entries = data["inputs"]
213
+ elif isinstance(data, list):
214
+ entries = data
215
+ else:
216
+ raise ValueError(
217
+ "Inputs file must be a list of inputs or a mapping containing an 'inputs' list"
218
+ )
219
+
220
+ for index, item in enumerate(entries):
221
+ if not isinstance(item, dict):
222
+ raise ValueError(
223
+ f"Input entry at index {index} must be a mapping, got {type(item).__name__}"
224
+ )
225
+
226
+ input_value = item.get("input")
227
+ if not input_value:
228
+ raise ValueError(f"Input entry at index {index} is missing required 'input' field")
229
+
230
+ variations.append({
231
+ "input": input_value,
232
+ "metadata": item.get("metadata", item),
233
+ "source": "external_file",
234
+ "source_index": index,
235
+ })
236
+
237
+ if not variations:
238
+ raise ValueError(f"Inputs file {inputs_path} did not contain any inputs")
239
+
240
+ return variations
241
+
242
+ async def _run_single(
243
+ self,
244
+ agent_func: Callable,
245
+ variation: Dict[str, Any],
246
+ persona: Optional[PersonaConfig],
247
+ iteration: int,
248
+ ) -> None:
249
+ """Run a single execution."""
250
+ self.results["total_runs"] += 1
251
+
252
+ # Create trace name
253
+ trace_name = f"{self.config.name}_iter{iteration}"
254
+ if persona:
255
+ trace_name += f"_persona_{persona.name}"
256
+
257
+ # Prepare input
258
+ input_text = variation["input"]
259
+ if persona and self.config.input_template:
260
+ # Apply persona to input template
261
+ input_text = self.config.input_template.format(
262
+ input=input_text,
263
+ persona=persona.to_prompt(),
264
+ )
265
+
266
+ # Run with instrumentation
267
+ start_time = time.time()
268
+
269
+ try:
270
+ callback_messages: Dict[str, Any] = {}
271
+ trace_id: Optional[str] = None
272
+ result: Any
273
+
274
+ with fluxloop.instrument(trace_name) as ctx:
275
+ if hasattr(ctx, "trace") and getattr(ctx, "trace") is not None:
276
+ trace_id = str(ctx.trace.id)
277
+ ctx.add_metadata("trace_id", trace_id)
278
+
279
+ # Add metadata
280
+ ctx.add_metadata("iteration", iteration)
281
+ ctx.add_metadata("variation", variation)
282
+ if persona:
283
+ ctx.add_metadata("persona", persona.name)
284
+
285
+ # Run agent
286
+ result = await self._call_agent(
287
+ agent_func,
288
+ input_text,
289
+ iteration=iteration,
290
+ callback_store=callback_messages,
291
+ )
292
+
293
+ # Allow background callbacks to flush
294
+ await self._wait_for_callbacks(callback_messages)
295
+
296
+ send_messages = callback_messages.get("send", [])
297
+ error_messages = callback_messages.get("error", [])
298
+
299
+ if send_messages or error_messages:
300
+ ctx.add_metadata(
301
+ "callback_messages",
302
+ {
303
+ "send": send_messages,
304
+ "error": error_messages,
305
+ },
306
+ )
307
+
308
+ # Force flush buffered events so observations are persisted
309
+ EventBuffer.get_instance().flush()
310
+
311
+ observations: List[Dict[str, Any]] = []
312
+ if trace_id:
313
+ observations = self._load_observations_for_trace(trace_id)
314
+
315
+ final_output = self._extract_final_output(callback_messages, observations)
316
+ if final_output is not None:
317
+ result = final_output
318
+
319
+ # Mark successful
320
+ self.results["successful"] += 1
321
+
322
+ # Record duration
323
+ duration_ms = (time.time() - start_time) * 1000
324
+ self.results["durations"].append(duration_ms)
325
+
326
+ trace_entry = {
327
+ "trace_id": trace_id,
328
+ "iteration": iteration,
329
+ "persona": persona.name if persona else None,
330
+ "input": input_text,
331
+ "output": result,
332
+ "duration_ms": duration_ms,
333
+ "success": True,
334
+ }
335
+
336
+ send_messages = callback_messages.get("send", [])
337
+ error_messages = callback_messages.get("error", [])
338
+
339
+ if send_messages or error_messages:
340
+ trace_entry["callback_messages"] = {
341
+ "send": [self._serialize_callback(args, kwargs) for args, kwargs in send_messages],
342
+ "error": [self._serialize_callback(args, kwargs) for args, kwargs in error_messages],
343
+ }
344
+
345
+ if observations:
346
+ trace_entry["observation_count"] = len(observations)
347
+
348
+ self.results["traces"].append(trace_entry)
349
+
350
+ except Exception as e:
351
+ # Record failure
352
+ self.results["failed"] += 1
353
+ self.results["errors"].append({
354
+ "iteration": iteration,
355
+ "persona": persona.name if persona else None,
356
+ "input": input_text,
357
+ "error": str(e),
358
+ })
359
+
360
+ def _resolve_entry_persona(
361
+ self,
362
+ entry: Dict[str, Any],
363
+ persona_map: Dict[str, PersonaConfig],
364
+ ) -> Optional[PersonaConfig]:
365
+ """Select persona metadata from an input entry when available."""
366
+
367
+ metadata = entry.get("metadata") or {}
368
+ persona_name = metadata.get("persona")
369
+
370
+ if persona_name and persona_name in persona_map:
371
+ return persona_map[persona_name]
372
+
373
+ return None
374
+
375
+ async def _call_agent(
376
+ self,
377
+ agent_func: Callable,
378
+ input_text: str,
379
+ iteration: int = 0,
380
+ callback_store: Optional[Dict[str, Any]] = None,
381
+ ) -> Any:
382
+ """Call the agent with arguments bound by ArgBinder (sync or async)."""
383
+
384
+ kwargs = self._arg_binder.bind_call_args(
385
+ agent_func,
386
+ runtime_input=input_text,
387
+ iteration=iteration,
388
+ )
389
+
390
+ # Attach collector callback capture if present
391
+ if callback_store is not None:
392
+ send_cb = kwargs.get("send_message_callback")
393
+ if callable(send_cb) and hasattr(send_cb, "messages"):
394
+ callback_store["send"] = send_cb.messages
395
+
396
+ error_cb = kwargs.get("send_error_callback")
397
+ if callable(error_cb) and hasattr(error_cb, "errors"):
398
+ callback_store["error"] = error_cb.errors
399
+
400
+ if asyncio.iscoroutinefunction(agent_func):
401
+ return await agent_func(**kwargs)
402
+
403
+ loop = asyncio.get_event_loop()
404
+ return await loop.run_in_executor(None, lambda: agent_func(**kwargs))
405
+
406
+ async def _wait_for_callbacks(
407
+ self,
408
+ callback_messages: Dict[str, Any],
409
+ *,
410
+ timeout_seconds: float = 5.0,
411
+ poll_interval: float = 0.1,
412
+ ) -> None:
413
+ """Wait briefly for background callbacks to populate the capture lists."""
414
+ if not callback_messages:
415
+ return
416
+
417
+ deadline = time.time() + timeout_seconds
418
+ while time.time() < deadline:
419
+ if callback_messages.get("send") or callback_messages.get("error"):
420
+ break
421
+ await asyncio.sleep(poll_interval)
422
+
423
+ def _load_observations_for_trace(self, trace_id: str) -> List[Dict[str, Any]]:
424
+ """Load observations from the offline store that match the given trace_id."""
425
+ observations_path = self.offline_dir / "observations.jsonl"
426
+ if not observations_path.exists():
427
+ return []
428
+
429
+ matched: List[Dict[str, Any]] = []
430
+ try:
431
+ with observations_path.open("r", encoding="utf-8") as src:
432
+ for line in src:
433
+ if not line.strip():
434
+ continue
435
+ try:
436
+ data = json.loads(line)
437
+ except json.JSONDecodeError:
438
+ continue
439
+ if data.get("trace_id") == trace_id:
440
+ matched.append(data)
441
+ except OSError:
442
+ return []
443
+
444
+ return matched
445
+
446
+ def _extract_final_output(
447
+ self,
448
+ callback_messages: Dict[str, Any],
449
+ observations: List[Dict[str, Any]],
450
+ ) -> Any:
451
+ """Derive the final output from callbacks or observations."""
452
+ for observation in reversed(observations):
453
+ if observation.get("name") == "agent_final_response" and observation.get("output") is not None:
454
+ return observation.get("output")
455
+
456
+ for observation in reversed(observations):
457
+ if observation.get("type") == "agent" and observation.get("output") is not None:
458
+ return observation.get("output")
459
+
460
+ send_messages = callback_messages.get("send") if callback_messages else None
461
+ if send_messages:
462
+ last_args, last_kwargs = send_messages[-1]
463
+ return self._extract_payload(last_args, last_kwargs)
464
+
465
+ return None
466
+
467
+ @staticmethod
468
+ def _extract_payload(args: Sequence[Any], kwargs: Dict[str, Any]) -> Any:
469
+ if kwargs:
470
+ return kwargs
471
+
472
+ if not args:
473
+ return None
474
+
475
+ if len(args) == 1:
476
+ return args[0]
477
+
478
+ return list(args)
479
+
480
+ @staticmethod
481
+ def _serialize_callback(args: Sequence[Any], kwargs: Dict[str, Any]) -> Any:
482
+ """Serialize callback arguments for JSON storage."""
483
+ if len(args) == 1 and not kwargs:
484
+ return args[0]
485
+ return {
486
+ "args": list(args),
487
+ "kwargs": kwargs,
488
+ }
489
+
490
+ def _save_results(self) -> None:
491
+ """Save results to output directory."""
492
+ # Save summary
493
+ summary_file = self.output_dir / "summary.json"
494
+ summary = {
495
+ "name": self.config.name,
496
+ "date": datetime.now().isoformat(),
497
+ "config": self.config.to_dict(),
498
+ "results": {
499
+ "total_runs": self.results["total_runs"],
500
+ "successful": self.results["successful"],
501
+ "failed": self.results["failed"],
502
+ "success_rate": self.results["success_rate"],
503
+ "avg_duration_ms": self.results["avg_duration_ms"],
504
+ "duration_seconds": self.results["duration_seconds"],
505
+ },
506
+ }
507
+ summary_file.write_text(json.dumps(summary, indent=2))
508
+
509
+ if self.config.save_traces:
510
+ self._save_trace_summary()
511
+ self._save_experiment_observations()
512
+
513
+ # Save errors
514
+ if self.results["errors"]:
515
+ errors_file = self.output_dir / "errors.json"
516
+ errors_file.write_text(json.dumps(self.results["errors"], indent=2))
517
+
518
+ def _save_trace_summary(self) -> None:
519
+ """Persist detailed and summary trace information for the experiment."""
520
+ full_traces_path = self.output_dir / "traces.jsonl"
521
+ summary_path = self.output_dir / "trace_summary.jsonl"
522
+
523
+ with full_traces_path.open("w", encoding="utf-8") as full_file:
524
+ for trace in self.results["traces"]:
525
+ full_file.write(json.dumps(trace) + "\n")
526
+
527
+ with summary_path.open("w", encoding="utf-8") as summary_file:
528
+ for trace in self.results["traces"]:
529
+ summary_file.write(
530
+ json.dumps(
531
+ {
532
+ "trace_id": trace.get("trace_id"),
533
+ "iteration": trace.get("iteration"),
534
+ "persona": trace.get("persona"),
535
+ "input": trace.get("input"),
536
+ "output": trace.get("output"),
537
+ "duration_ms": trace.get("duration_ms"),
538
+ "success": trace.get("success"),
539
+ }
540
+ )
541
+ + "\n"
542
+ )
543
+
544
+ def _save_experiment_observations(self) -> None:
545
+ """Copy matching observations from the offline store into the experiment directory."""
546
+ trace_ids = {
547
+ trace["trace_id"]
548
+ for trace in self.results["traces"]
549
+ if trace.get("trace_id")
550
+ }
551
+
552
+ if not trace_ids:
553
+ return
554
+
555
+ source_path = self.offline_dir / "observations.jsonl"
556
+ if not source_path.exists():
557
+ console.print(
558
+ f"[yellow]⚠️ Observations file not found: {source_path}[/yellow]"
559
+ )
560
+ return
561
+
562
+ destination = self.output_dir / "observations.jsonl"
563
+ copied = 0
564
+
565
+ with source_path.open("r", encoding="utf-8") as src, destination.open(
566
+ "w", encoding="utf-8"
567
+ ) as dst:
568
+ for line in src:
569
+ if not line.strip():
570
+ continue
571
+ try:
572
+ record = json.loads(line)
573
+ except json.JSONDecodeError:
574
+ continue
575
+ if record.get("trace_id") in trace_ids:
576
+ dst.write(json.dumps(record) + "\n")
577
+ copied += 1
578
+
579
+ console.print(
580
+ f"[green]✅ Saved {copied} observations to {destination.name}[/green]"
581
+ )
582
+
583
+
584
+ class SingleRunner:
585
+ """Runner for single agent executions."""
586
+
587
+ def __init__(
588
+ self,
589
+ module_path: str,
590
+ function_name: str = "run",
591
+ trace_name: Optional[str] = None,
592
+ no_collector: bool = False,
593
+ ):
594
+ """
595
+ Initialize single runner.
596
+
597
+ Args:
598
+ module_path: Module path to agent
599
+ function_name: Function to call
600
+ trace_name: Name for the trace
601
+ no_collector: If True, disable collector
602
+ """
603
+ self.module_path = module_path
604
+ self.function_name = function_name
605
+ self.trace_name = trace_name or f"single_{module_path}"
606
+
607
+ if no_collector:
608
+ fluxloop.configure(enabled=False)
609
+
610
+ async def run(self, input_text: str) -> Any:
611
+ """
612
+ Run the agent once.
613
+
614
+ Args:
615
+ input_text: Input for the agent
616
+
617
+ Returns:
618
+ Agent output
619
+ """
620
+ # Load agent
621
+ try:
622
+ module = importlib.import_module(self.module_path)
623
+ agent_func = getattr(module, self.function_name)
624
+ except (ImportError, AttributeError) as e:
625
+ raise RuntimeError(f"Failed to load agent: {e}")
626
+
627
+ # Run with instrumentation
628
+ with fluxloop.instrument(self.trace_name):
629
+ if asyncio.iscoroutinefunction(agent_func):
630
+ return await agent_func(input_text)
631
+ else:
632
+ # Run sync function in executor
633
+ loop = asyncio.get_event_loop()
634
+ return await loop.run_in_executor(None, agent_func, input_text)