fluxloop-cli 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of fluxloop-cli might be problematic. Click here for more details.

@@ -0,0 +1,293 @@
1
+ """Parse command for generating human-readable experiment artifacts."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ from collections import defaultdict
7
+ from dataclasses import dataclass
8
+ from datetime import datetime
9
+ from pathlib import Path
10
+ from typing import Dict, Iterable, List, Literal, Optional
11
+
12
+ import typer
13
+ from rich.console import Console
14
+
15
+ console = Console()
16
+ app = typer.Typer()
17
+
18
+
19
+ @dataclass
20
+ class Observation:
21
+ """A single observation entry parsed from observations.jsonl."""
22
+
23
+ trace_id: str
24
+ type: str
25
+ name: Optional[str]
26
+ start_time: Optional[str]
27
+ end_time: Optional[str]
28
+ level: Optional[str]
29
+ input: Optional[dict]
30
+ output: Optional[dict]
31
+ raw: dict
32
+
33
+ @property
34
+ def duration_ms(self) -> Optional[float]:
35
+ """Return duration in milliseconds if timestamps are available."""
36
+
37
+ if not self.start_time or not self.end_time:
38
+ return None
39
+
40
+ try:
41
+ start = datetime.fromisoformat(self.start_time.replace("Z", "+00:00"))
42
+ end = datetime.fromisoformat(self.end_time.replace("Z", "+00:00"))
43
+ return (end - start).total_seconds() * 1000
44
+ except ValueError:
45
+ return None
46
+
47
+
48
+ @dataclass
49
+ class TraceSummary:
50
+ """Reduced structure for entries inside trace_summary.jsonl."""
51
+
52
+ trace_id: str
53
+ iteration: int
54
+ persona: Optional[str]
55
+ input_text: str
56
+ output_text: Optional[str]
57
+ duration_ms: float
58
+ success: bool
59
+ raw: dict
60
+
61
+
62
+ def _load_observations(path: Path) -> Dict[str, List[Observation]]:
63
+ """Load observations grouped by trace_id."""
64
+
65
+ grouped: Dict[str, List[Observation]] = defaultdict(list)
66
+ if not path.exists():
67
+ raise FileNotFoundError(f"observations.jsonl not found at {path}")
68
+
69
+ with path.open("r", encoding="utf-8") as handle:
70
+ for line_no, line in enumerate(handle, start=1):
71
+ line = line.strip()
72
+ if not line:
73
+ continue
74
+
75
+ try:
76
+ payload = json.loads(line)
77
+ except json.JSONDecodeError as exc:
78
+ raise ValueError(
79
+ f"Invalid JSON in observations.jsonl at line {line_no}: {exc}"
80
+ ) from exc
81
+
82
+ trace_id = payload.get("trace_id")
83
+ if not trace_id:
84
+ # Observations without trace are not relevant for per-trace visualization
85
+ continue
86
+
87
+ grouped[trace_id].append(
88
+ Observation(
89
+ trace_id=trace_id,
90
+ type=payload.get("type", "unknown"),
91
+ name=payload.get("name"),
92
+ start_time=payload.get("start_time"),
93
+ end_time=payload.get("end_time"),
94
+ level=payload.get("level"),
95
+ input=payload.get("input"),
96
+ output=payload.get("output"),
97
+ raw=payload,
98
+ )
99
+ )
100
+
101
+ return grouped
102
+
103
+
104
+ def _load_trace_summaries(path: Path) -> Iterable[TraceSummary]:
105
+ """Yield trace summaries from trace_summary.jsonl."""
106
+
107
+ if not path.exists():
108
+ raise FileNotFoundError(f"trace_summary.jsonl not found at {path}")
109
+
110
+ with path.open("r", encoding="utf-8") as handle:
111
+ for line_no, line in enumerate(handle, start=1):
112
+ line = line.strip()
113
+ if not line:
114
+ continue
115
+
116
+ try:
117
+ payload = json.loads(line)
118
+ except json.JSONDecodeError as exc:
119
+ raise ValueError(
120
+ f"Invalid JSON in trace_summary.jsonl at line {line_no}: {exc}"
121
+ ) from exc
122
+
123
+ trace_id = payload.get("trace_id")
124
+ if not trace_id:
125
+ continue
126
+
127
+ yield TraceSummary(
128
+ trace_id=trace_id,
129
+ iteration=payload.get("iteration", 0),
130
+ persona=payload.get("persona"),
131
+ input_text=payload.get("input", ""),
132
+ output_text=payload.get("output"),
133
+ duration_ms=payload.get("duration_ms", 0.0),
134
+ success=payload.get("success", False),
135
+ raw=payload,
136
+ )
137
+
138
+
139
+ def _format_json_block(data: Optional[dict], *, indent: int = 2) -> str:
140
+ """Render a JSON dictionary as a fenced code block."""
141
+
142
+ if data is None:
143
+ return "(no data)"
144
+
145
+ try:
146
+ return "```json\n" + json.dumps(data, indent=indent, ensure_ascii=False) + "\n```"
147
+ except (TypeError, ValueError):
148
+ # Fallback to raw repr when data contains non-serializable content
149
+ return "```\n" + repr(data) + "\n```"
150
+
151
+
152
+ def _format_markdown(
153
+ trace: TraceSummary,
154
+ observations: List[Observation],
155
+ ) -> str:
156
+ """Create markdown visualization for a single trace."""
157
+
158
+ observations_sorted = sorted(
159
+ observations,
160
+ key=lambda obs: (obs.start_time or "", obs.end_time or ""),
161
+ )
162
+
163
+ header = (
164
+ "---\n"
165
+ f"trace_id: \"{trace.trace_id}\"\n"
166
+ f"iteration: {trace.iteration}\n"
167
+ f"persona: {json.dumps(trace.persona) if trace.persona else 'null'}\n"
168
+ f"duration_ms: {trace.duration_ms:.2f}\n"
169
+ f"success: {'true' if trace.success else 'false'}\n"
170
+ "---\n\n"
171
+ )
172
+
173
+ summary_section = (
174
+ "# Trace Analysis\n\n"
175
+ "## Summary\n"
176
+ f"- Trace ID: `{trace.trace_id}`\n"
177
+ f"- Iteration: `{trace.iteration}`\n"
178
+ f"- Persona: `{trace.persona or 'N/A'}`\n"
179
+ f"- Duration: `{trace.duration_ms:.2f} ms`\n"
180
+ f"- Success: `{trace.success}`\n"
181
+ "\n"
182
+ "### Input\n"
183
+ f"{_format_json_block({'input': trace.input_text})}\n\n"
184
+ "### Output\n"
185
+ f"{_format_json_block({'output': trace.output_text})}\n\n"
186
+ )
187
+
188
+ timeline_lines = ["## Timeline\n"]
189
+
190
+ for index, obs in enumerate(observations_sorted, start=1):
191
+ duration = obs.duration_ms
192
+ duration_str = f"{duration:.2f} ms" if duration is not None else "N/A"
193
+ start = obs.start_time or "N/A"
194
+ end = obs.end_time or "N/A"
195
+ timeline_lines.append(
196
+ "---\n"
197
+ f"### Step {index}: [{obs.type}] {obs.name or 'unknown'}\n"
198
+ f"- Start: `{start}`\n"
199
+ f"- End: `{end}`\n"
200
+ f"- Duration: `{duration_str}`\n"
201
+ f"- Level: `{obs.level or 'N/A'}`\n"
202
+ "\n"
203
+ "**Input**\n"
204
+ f"{_format_json_block(obs.input)}\n\n"
205
+ "**Output**\n"
206
+ f"{_format_json_block(obs.output)}\n\n"
207
+ )
208
+
209
+ if not observations_sorted:
210
+ timeline_lines.append("(no observations recorded)\n")
211
+
212
+ return header + summary_section + "".join(timeline_lines)
213
+
214
+
215
+ def _slugify(name: str) -> str:
216
+ """Create a filesystem-safe slug from a trace identifier."""
217
+
218
+ return "".join(c if c.isalnum() or c in {"-", "_"} else "-" for c in name)
219
+
220
+
221
+ def _ensure_experiment_dir(path: Path) -> Path:
222
+ if not path.is_dir():
223
+ raise typer.BadParameter(f"Experiment directory not found: {path}")
224
+ return path
225
+
226
+
227
+ @app.command()
228
+ def experiment(
229
+ experiment_dir: Path = typer.Argument(..., help="Path to the experiment output directory"),
230
+ output: Path = typer.Option(
231
+ Path("per_trace_analysis"),
232
+ "--output",
233
+ "-o",
234
+ help="Directory name (relative to experiment_dir) to store parsed files",
235
+ ),
236
+ fmt: Literal["md"] = typer.Option(
237
+ "md",
238
+ "--format",
239
+ "-f",
240
+ help="Output format (currently only 'md' supported)",
241
+ ),
242
+ overwrite: bool = typer.Option(
243
+ False,
244
+ "--overwrite",
245
+ help="Overwrite the output directory if it already exists",
246
+ ),
247
+ ):
248
+ """Parse experiment artifacts into readable per-trace files."""
249
+
250
+ if fmt != "md":
251
+ raise typer.BadParameter("Only 'md' format is currently supported")
252
+
253
+ experiment_dir = _ensure_experiment_dir(experiment_dir)
254
+ output_dir = experiment_dir / output
255
+
256
+ if output_dir.exists():
257
+ if not overwrite:
258
+ raise typer.BadParameter(
259
+ f"Output directory already exists: {output_dir}. Use --overwrite to replace."
260
+ )
261
+ else:
262
+ output_dir.mkdir(parents=True, exist_ok=True)
263
+
264
+ console.print(
265
+ f"šŸ“‚ Loading experiment from: [cyan]{experiment_dir.resolve()}[/cyan]"
266
+ )
267
+
268
+ observations_path = experiment_dir / "observations.jsonl"
269
+ trace_summary_path = experiment_dir / "trace_summary.jsonl"
270
+
271
+ observations = _load_observations(observations_path)
272
+ summaries = list(_load_trace_summaries(trace_summary_path))
273
+
274
+ if not summaries:
275
+ console.print("[yellow]No trace summaries found. Nothing to parse.[/yellow]")
276
+ raise typer.Exit(0)
277
+
278
+ console.print(
279
+ f"šŸ“ Found {len(summaries)} trace summaries. Generating markdown..."
280
+ )
281
+
282
+ for summary in summaries:
283
+ trace_observations = observations.get(summary.trace_id, [])
284
+ content = _format_markdown(summary, trace_observations)
285
+ file_name = f"{summary.iteration:02d}_{_slugify(summary.trace_id)}.{fmt}"
286
+ target_path = output_dir / file_name
287
+ target_path.write_text(content, encoding="utf-8")
288
+
289
+ console.print(
290
+ f"āœ… Generated {len(summaries)} files in: [green]{output_dir.resolve()}[/green]"
291
+ )
292
+
293
+
@@ -0,0 +1,310 @@
1
+ """
2
+ Run command for executing experiments and simulations.
3
+ """
4
+
5
+ import asyncio
6
+ import sys
7
+ from pathlib import Path
8
+ from typing import Optional
9
+
10
+ import typer
11
+ from rich.console import Console
12
+ from rich.live import Live
13
+ from rich.panel import Panel
14
+ from rich.progress import Progress, SpinnerColumn, TextColumn, BarColumn, TimeRemainingColumn
15
+ from rich.table import Table
16
+
17
+ from ..runner import ExperimentRunner
18
+ from ..config_loader import load_experiment_config
19
+ from ..constants import DEFAULT_CONFIG_PATH, DEFAULT_ROOT_DIR_NAME
20
+ from ..project_paths import (
21
+ resolve_config_path,
22
+ resolve_project_relative,
23
+ )
24
+
25
+ app = typer.Typer()
26
+ console = Console()
27
+
28
+
29
+ @app.command()
30
+ def experiment(
31
+ config_file: Path = typer.Option(
32
+ DEFAULT_CONFIG_PATH,
33
+ "--config",
34
+ "-c",
35
+ help="Path to experiment configuration file",
36
+ ),
37
+ project: Optional[str] = typer.Option(
38
+ None,
39
+ "--project",
40
+ help="Project name under the FluxLoop root",
41
+ ),
42
+ root: Path = typer.Option(
43
+ Path(DEFAULT_ROOT_DIR_NAME),
44
+ "--root",
45
+ help="FluxLoop root directory",
46
+ ),
47
+ iterations: Optional[int] = typer.Option(
48
+ None,
49
+ "--iterations",
50
+ "-i",
51
+ help="Override number of iterations",
52
+ ),
53
+ personas: Optional[str] = typer.Option(
54
+ None,
55
+ "--personas",
56
+ "-p",
57
+ help="Comma-separated list of personas to use",
58
+ ),
59
+ output_dir: Optional[Path] = typer.Option(
60
+ None,
61
+ "--output",
62
+ "-o",
63
+ help="Override output directory",
64
+ ),
65
+ no_collector: bool = typer.Option(
66
+ False,
67
+ "--no-collector",
68
+ help="Run without sending data to collector",
69
+ ),
70
+ dry_run: bool = typer.Option(
71
+ False,
72
+ "--dry-run",
73
+ help="Show what would be run without executing",
74
+ ),
75
+ ):
76
+ """
77
+ Run an experiment based on configuration file.
78
+
79
+ This command:
80
+ - Loads experiment configuration
81
+ - Generates prompt variations
82
+ - Runs agent with each variation
83
+ - Collects traces and metrics
84
+ - Generates summary report
85
+ """
86
+ # Check if config file exists
87
+ resolved_config = resolve_config_path(config_file, project, root)
88
+ if not resolved_config.exists():
89
+ console.print(f"[red]Error:[/red] Configuration file not found: {config_file}")
90
+ console.print("\nRun [cyan]fluxloop init project[/cyan] to create a configuration file.")
91
+ raise typer.Exit(1)
92
+
93
+ # Load configuration
94
+ console.print(f"šŸ“‹ Loading configuration from: [cyan]{resolved_config}[/cyan]")
95
+
96
+ try:
97
+ config = load_experiment_config(resolved_config)
98
+ except Exception as e:
99
+ console.print(f"[red]Error loading configuration:[/red] {e}")
100
+ raise typer.Exit(1)
101
+
102
+ # Override configuration if needed
103
+ if iterations is not None:
104
+ config.iterations = iterations
105
+
106
+ if personas:
107
+ persona_list = [p.strip() for p in personas.split(",")]
108
+ config.personas = [p for p in config.personas if p.name in persona_list]
109
+
110
+ if output_dir:
111
+ resolved_output = resolve_project_relative(output_dir, project, root)
112
+ config.output_directory = str(resolved_output)
113
+
114
+ # Load inputs to ensure accurate counts before showing the summary
115
+ try:
116
+ runner = ExperimentRunner(config, no_collector=no_collector)
117
+ loaded_inputs = asyncio.run(runner._load_inputs()) # type: ignore[attr-defined]
118
+ except Exception as e:
119
+ console.print(f"[red]Error preparing inputs:[/red] {e}")
120
+ raise typer.Exit(1)
121
+
122
+ config.set_resolved_input_count(len(loaded_inputs))
123
+ total_runs = config.estimate_total_runs()
124
+
125
+ summary = Table(title="Experiment Summary", show_header=False)
126
+ summary.add_column("Property", style="cyan")
127
+ summary.add_column("Value", style="white")
128
+
129
+ summary.add_row("Name", config.name)
130
+ summary.add_row("Iterations", str(config.iterations))
131
+ summary.add_row("Personas", str(len(config.personas)))
132
+ summary.add_row(
133
+ "Input Source",
134
+ "external file" if config.has_external_inputs() else "base_inputs",
135
+ )
136
+ summary.add_row("Total Runs", str(total_runs))
137
+ summary.add_row("Output", config.output_directory)
138
+
139
+ console.print(summary)
140
+
141
+ if dry_run:
142
+ console.print("\n[yellow]Dry run mode - no execution will occur[/yellow]")
143
+ return
144
+
145
+ # Confirm execution
146
+ if total_runs > 100:
147
+ console.print(
148
+ f"\n[yellow]Warning:[/yellow] This will execute {total_runs} runs. "
149
+ "This may take a while and incur API costs."
150
+ )
151
+ else:
152
+ console.print(
153
+ f"\nThis will execute {total_runs} runs."
154
+ )
155
+
156
+ if not typer.confirm("Continue?"):
157
+ raise typer.Abort()
158
+
159
+ # Create runner
160
+ # Run experiment with progress tracking
161
+ console.print("\n[bold green]ā–¶ļø Starting experiment...[/bold green]\n")
162
+
163
+ with Progress(
164
+ SpinnerColumn(),
165
+ TextColumn("[progress.description]{task.description}"),
166
+ BarColumn(),
167
+ TextColumn("[progress.percentage]{task.percentage:>3.0f}%"),
168
+ TimeRemainingColumn(),
169
+ TextColumn("({task.completed} of {task.total})"),
170
+ console=console,
171
+ ) as progress:
172
+ # Create main task
173
+ main_task = progress.add_task(
174
+ f"Running {config.name}",
175
+ total=total_runs,
176
+ )
177
+
178
+ def _progress_callback():
179
+ progress.advance(main_task)
180
+
181
+ # Run experiment
182
+ try:
183
+ results = asyncio.run(
184
+ runner.run_experiment(
185
+ progress_callback=_progress_callback
186
+ )
187
+ )
188
+ except KeyboardInterrupt:
189
+ console.print("\n[yellow]Experiment interrupted by user[/yellow]")
190
+ raise typer.Exit(1)
191
+ except Exception as e:
192
+ console.print(f"\n[red]Error during experiment:[/red] {e}")
193
+ # Debug mode - show full traceback
194
+ if "--debug" in sys.argv:
195
+ console.print_exception()
196
+ raise typer.Exit(1)
197
+
198
+ config.set_resolved_input_count(results.get("input_count", config.get_input_count()))
199
+
200
+ input_total_runs = config.estimate_total_runs()
201
+
202
+ if input_total_runs != total_runs:
203
+ console.print(
204
+ f"\n[yellow]Notice:[/yellow] Effective total runs adjusted to {input_total_runs} "
205
+ "after loading inputs."
206
+ )
207
+
208
+ _display_results(results)
209
+
210
+
211
+ @app.command()
212
+ def single(
213
+ agent_path: str = typer.Argument(
214
+ ...,
215
+ help="Module path to agent (e.g., my_agent.main)",
216
+ ),
217
+ input_text: str = typer.Argument(
218
+ ...,
219
+ help="Input text for the agent",
220
+ ),
221
+ function_name: str = typer.Option(
222
+ "run",
223
+ "--function",
224
+ "-f",
225
+ help="Function name to call",
226
+ ),
227
+ trace_name: Optional[str] = typer.Option(
228
+ None,
229
+ "--trace-name",
230
+ help="Name for the trace",
231
+ ),
232
+ no_collector: bool = typer.Option(
233
+ False,
234
+ "--no-collector",
235
+ help="Run without sending data to collector",
236
+ ),
237
+ ):
238
+ """
239
+ Run a single agent execution.
240
+
241
+ Quick way to test an agent without a full experiment configuration.
242
+ """
243
+ console.print(f"šŸ¤– Running agent: [cyan]{agent_path}.{function_name}[/cyan]")
244
+ console.print(f"šŸ“ Input: {input_text[:100]}{'...' if len(input_text) > 100 else ''}")
245
+
246
+ # Create minimal runner
247
+ from ..runner import SingleRunner
248
+
249
+ runner = SingleRunner(
250
+ module_path=agent_path,
251
+ function_name=function_name,
252
+ trace_name=trace_name or f"single_{agent_path}",
253
+ no_collector=no_collector,
254
+ )
255
+
256
+ # Run agent
257
+ console.print("\n[bold]Executing...[/bold]\n")
258
+
259
+ try:
260
+ result = asyncio.run(runner.run(input_text))
261
+
262
+ console.print(Panel(
263
+ str(result),
264
+ title="[bold green]Result[/bold green]",
265
+ border_style="green",
266
+ ))
267
+
268
+ except Exception as e:
269
+ console.print(f"[red]Error:[/red] {e}")
270
+ # Debug mode - show full traceback
271
+ if "--debug" in sys.argv:
272
+ console.print_exception()
273
+ raise typer.Exit(1)
274
+
275
+
276
+ def _display_results(results: dict):
277
+ """Display experiment results."""
278
+ console.print("\n" + "="*50)
279
+ console.print("[bold green]Experiment Complete![/bold green]")
280
+ console.print("="*50 + "\n")
281
+
282
+ # Create results table
283
+ table = Table(title="Results Summary")
284
+ table.add_column("Metric", style="cyan")
285
+ table.add_column("Value", style="white")
286
+
287
+ # Add metrics
288
+ table.add_row("Total Runs", str(results.get("total_runs", 0)))
289
+ table.add_row("Successful", str(results.get("successful", 0)))
290
+ table.add_row("Failed", str(results.get("failed", 0)))
291
+
292
+ success_rate = results.get("success_rate", 0) * 100
293
+ table.add_row("Success Rate", f"{success_rate:.1f}%")
294
+
295
+ avg_duration = results.get("avg_duration_ms", 0)
296
+ table.add_row("Avg Duration", f"{avg_duration:.0f}ms")
297
+
298
+ console.print(table)
299
+
300
+ # Show output location
301
+ if results.get("output_dir"):
302
+ console.print(f"\nšŸ“ Results saved to: [cyan]{results['output_dir']}[/cyan]")
303
+
304
+ # Show trace URLs if available
305
+ if results.get("trace_urls"):
306
+ console.print("\nšŸ”— View traces:")
307
+ for url in results["trace_urls"][:5]: # Show first 5
308
+ console.print(f" {url}")
309
+ if len(results["trace_urls"]) > 5:
310
+ console.print(f" ... and {len(results['trace_urls']) - 5} more")