fluxloop-cli 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of fluxloop-cli might be problematic. Click here for more details.
- fluxloop_cli/__init__.py +9 -0
- fluxloop_cli/arg_binder.py +219 -0
- fluxloop_cli/commands/__init__.py +5 -0
- fluxloop_cli/commands/config.py +355 -0
- fluxloop_cli/commands/generate.py +304 -0
- fluxloop_cli/commands/init.py +225 -0
- fluxloop_cli/commands/parse.py +293 -0
- fluxloop_cli/commands/run.py +310 -0
- fluxloop_cli/commands/status.py +227 -0
- fluxloop_cli/config_loader.py +159 -0
- fluxloop_cli/constants.py +12 -0
- fluxloop_cli/input_generator.py +158 -0
- fluxloop_cli/llm_generator.py +417 -0
- fluxloop_cli/main.py +97 -0
- fluxloop_cli/project_paths.py +80 -0
- fluxloop_cli/runner.py +634 -0
- fluxloop_cli/target_loader.py +95 -0
- fluxloop_cli/templates.py +277 -0
- fluxloop_cli/validators.py +31 -0
- fluxloop_cli-0.1.0.dist-info/METADATA +86 -0
- fluxloop_cli-0.1.0.dist-info/RECORD +24 -0
- fluxloop_cli-0.1.0.dist-info/WHEEL +5 -0
- fluxloop_cli-0.1.0.dist-info/entry_points.txt +2 -0
- fluxloop_cli-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,293 @@
|
|
|
1
|
+
"""Parse command for generating human-readable experiment artifacts."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
from collections import defaultdict
|
|
7
|
+
from dataclasses import dataclass
|
|
8
|
+
from datetime import datetime
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Dict, Iterable, List, Literal, Optional
|
|
11
|
+
|
|
12
|
+
import typer
|
|
13
|
+
from rich.console import Console
|
|
14
|
+
|
|
15
|
+
console = Console()
|
|
16
|
+
app = typer.Typer()
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@dataclass
|
|
20
|
+
class Observation:
|
|
21
|
+
"""A single observation entry parsed from observations.jsonl."""
|
|
22
|
+
|
|
23
|
+
trace_id: str
|
|
24
|
+
type: str
|
|
25
|
+
name: Optional[str]
|
|
26
|
+
start_time: Optional[str]
|
|
27
|
+
end_time: Optional[str]
|
|
28
|
+
level: Optional[str]
|
|
29
|
+
input: Optional[dict]
|
|
30
|
+
output: Optional[dict]
|
|
31
|
+
raw: dict
|
|
32
|
+
|
|
33
|
+
@property
|
|
34
|
+
def duration_ms(self) -> Optional[float]:
|
|
35
|
+
"""Return duration in milliseconds if timestamps are available."""
|
|
36
|
+
|
|
37
|
+
if not self.start_time or not self.end_time:
|
|
38
|
+
return None
|
|
39
|
+
|
|
40
|
+
try:
|
|
41
|
+
start = datetime.fromisoformat(self.start_time.replace("Z", "+00:00"))
|
|
42
|
+
end = datetime.fromisoformat(self.end_time.replace("Z", "+00:00"))
|
|
43
|
+
return (end - start).total_seconds() * 1000
|
|
44
|
+
except ValueError:
|
|
45
|
+
return None
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
@dataclass
|
|
49
|
+
class TraceSummary:
|
|
50
|
+
"""Reduced structure for entries inside trace_summary.jsonl."""
|
|
51
|
+
|
|
52
|
+
trace_id: str
|
|
53
|
+
iteration: int
|
|
54
|
+
persona: Optional[str]
|
|
55
|
+
input_text: str
|
|
56
|
+
output_text: Optional[str]
|
|
57
|
+
duration_ms: float
|
|
58
|
+
success: bool
|
|
59
|
+
raw: dict
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def _load_observations(path: Path) -> Dict[str, List[Observation]]:
|
|
63
|
+
"""Load observations grouped by trace_id."""
|
|
64
|
+
|
|
65
|
+
grouped: Dict[str, List[Observation]] = defaultdict(list)
|
|
66
|
+
if not path.exists():
|
|
67
|
+
raise FileNotFoundError(f"observations.jsonl not found at {path}")
|
|
68
|
+
|
|
69
|
+
with path.open("r", encoding="utf-8") as handle:
|
|
70
|
+
for line_no, line in enumerate(handle, start=1):
|
|
71
|
+
line = line.strip()
|
|
72
|
+
if not line:
|
|
73
|
+
continue
|
|
74
|
+
|
|
75
|
+
try:
|
|
76
|
+
payload = json.loads(line)
|
|
77
|
+
except json.JSONDecodeError as exc:
|
|
78
|
+
raise ValueError(
|
|
79
|
+
f"Invalid JSON in observations.jsonl at line {line_no}: {exc}"
|
|
80
|
+
) from exc
|
|
81
|
+
|
|
82
|
+
trace_id = payload.get("trace_id")
|
|
83
|
+
if not trace_id:
|
|
84
|
+
# Observations without trace are not relevant for per-trace visualization
|
|
85
|
+
continue
|
|
86
|
+
|
|
87
|
+
grouped[trace_id].append(
|
|
88
|
+
Observation(
|
|
89
|
+
trace_id=trace_id,
|
|
90
|
+
type=payload.get("type", "unknown"),
|
|
91
|
+
name=payload.get("name"),
|
|
92
|
+
start_time=payload.get("start_time"),
|
|
93
|
+
end_time=payload.get("end_time"),
|
|
94
|
+
level=payload.get("level"),
|
|
95
|
+
input=payload.get("input"),
|
|
96
|
+
output=payload.get("output"),
|
|
97
|
+
raw=payload,
|
|
98
|
+
)
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
return grouped
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def _load_trace_summaries(path: Path) -> Iterable[TraceSummary]:
|
|
105
|
+
"""Yield trace summaries from trace_summary.jsonl."""
|
|
106
|
+
|
|
107
|
+
if not path.exists():
|
|
108
|
+
raise FileNotFoundError(f"trace_summary.jsonl not found at {path}")
|
|
109
|
+
|
|
110
|
+
with path.open("r", encoding="utf-8") as handle:
|
|
111
|
+
for line_no, line in enumerate(handle, start=1):
|
|
112
|
+
line = line.strip()
|
|
113
|
+
if not line:
|
|
114
|
+
continue
|
|
115
|
+
|
|
116
|
+
try:
|
|
117
|
+
payload = json.loads(line)
|
|
118
|
+
except json.JSONDecodeError as exc:
|
|
119
|
+
raise ValueError(
|
|
120
|
+
f"Invalid JSON in trace_summary.jsonl at line {line_no}: {exc}"
|
|
121
|
+
) from exc
|
|
122
|
+
|
|
123
|
+
trace_id = payload.get("trace_id")
|
|
124
|
+
if not trace_id:
|
|
125
|
+
continue
|
|
126
|
+
|
|
127
|
+
yield TraceSummary(
|
|
128
|
+
trace_id=trace_id,
|
|
129
|
+
iteration=payload.get("iteration", 0),
|
|
130
|
+
persona=payload.get("persona"),
|
|
131
|
+
input_text=payload.get("input", ""),
|
|
132
|
+
output_text=payload.get("output"),
|
|
133
|
+
duration_ms=payload.get("duration_ms", 0.0),
|
|
134
|
+
success=payload.get("success", False),
|
|
135
|
+
raw=payload,
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def _format_json_block(data: Optional[dict], *, indent: int = 2) -> str:
|
|
140
|
+
"""Render a JSON dictionary as a fenced code block."""
|
|
141
|
+
|
|
142
|
+
if data is None:
|
|
143
|
+
return "(no data)"
|
|
144
|
+
|
|
145
|
+
try:
|
|
146
|
+
return "```json\n" + json.dumps(data, indent=indent, ensure_ascii=False) + "\n```"
|
|
147
|
+
except (TypeError, ValueError):
|
|
148
|
+
# Fallback to raw repr when data contains non-serializable content
|
|
149
|
+
return "```\n" + repr(data) + "\n```"
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
def _format_markdown(
|
|
153
|
+
trace: TraceSummary,
|
|
154
|
+
observations: List[Observation],
|
|
155
|
+
) -> str:
|
|
156
|
+
"""Create markdown visualization for a single trace."""
|
|
157
|
+
|
|
158
|
+
observations_sorted = sorted(
|
|
159
|
+
observations,
|
|
160
|
+
key=lambda obs: (obs.start_time or "", obs.end_time or ""),
|
|
161
|
+
)
|
|
162
|
+
|
|
163
|
+
header = (
|
|
164
|
+
"---\n"
|
|
165
|
+
f"trace_id: \"{trace.trace_id}\"\n"
|
|
166
|
+
f"iteration: {trace.iteration}\n"
|
|
167
|
+
f"persona: {json.dumps(trace.persona) if trace.persona else 'null'}\n"
|
|
168
|
+
f"duration_ms: {trace.duration_ms:.2f}\n"
|
|
169
|
+
f"success: {'true' if trace.success else 'false'}\n"
|
|
170
|
+
"---\n\n"
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
summary_section = (
|
|
174
|
+
"# Trace Analysis\n\n"
|
|
175
|
+
"## Summary\n"
|
|
176
|
+
f"- Trace ID: `{trace.trace_id}`\n"
|
|
177
|
+
f"- Iteration: `{trace.iteration}`\n"
|
|
178
|
+
f"- Persona: `{trace.persona or 'N/A'}`\n"
|
|
179
|
+
f"- Duration: `{trace.duration_ms:.2f} ms`\n"
|
|
180
|
+
f"- Success: `{trace.success}`\n"
|
|
181
|
+
"\n"
|
|
182
|
+
"### Input\n"
|
|
183
|
+
f"{_format_json_block({'input': trace.input_text})}\n\n"
|
|
184
|
+
"### Output\n"
|
|
185
|
+
f"{_format_json_block({'output': trace.output_text})}\n\n"
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
timeline_lines = ["## Timeline\n"]
|
|
189
|
+
|
|
190
|
+
for index, obs in enumerate(observations_sorted, start=1):
|
|
191
|
+
duration = obs.duration_ms
|
|
192
|
+
duration_str = f"{duration:.2f} ms" if duration is not None else "N/A"
|
|
193
|
+
start = obs.start_time or "N/A"
|
|
194
|
+
end = obs.end_time or "N/A"
|
|
195
|
+
timeline_lines.append(
|
|
196
|
+
"---\n"
|
|
197
|
+
f"### Step {index}: [{obs.type}] {obs.name or 'unknown'}\n"
|
|
198
|
+
f"- Start: `{start}`\n"
|
|
199
|
+
f"- End: `{end}`\n"
|
|
200
|
+
f"- Duration: `{duration_str}`\n"
|
|
201
|
+
f"- Level: `{obs.level or 'N/A'}`\n"
|
|
202
|
+
"\n"
|
|
203
|
+
"**Input**\n"
|
|
204
|
+
f"{_format_json_block(obs.input)}\n\n"
|
|
205
|
+
"**Output**\n"
|
|
206
|
+
f"{_format_json_block(obs.output)}\n\n"
|
|
207
|
+
)
|
|
208
|
+
|
|
209
|
+
if not observations_sorted:
|
|
210
|
+
timeline_lines.append("(no observations recorded)\n")
|
|
211
|
+
|
|
212
|
+
return header + summary_section + "".join(timeline_lines)
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
def _slugify(name: str) -> str:
|
|
216
|
+
"""Create a filesystem-safe slug from a trace identifier."""
|
|
217
|
+
|
|
218
|
+
return "".join(c if c.isalnum() or c in {"-", "_"} else "-" for c in name)
|
|
219
|
+
|
|
220
|
+
|
|
221
|
+
def _ensure_experiment_dir(path: Path) -> Path:
|
|
222
|
+
if not path.is_dir():
|
|
223
|
+
raise typer.BadParameter(f"Experiment directory not found: {path}")
|
|
224
|
+
return path
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
@app.command()
|
|
228
|
+
def experiment(
|
|
229
|
+
experiment_dir: Path = typer.Argument(..., help="Path to the experiment output directory"),
|
|
230
|
+
output: Path = typer.Option(
|
|
231
|
+
Path("per_trace_analysis"),
|
|
232
|
+
"--output",
|
|
233
|
+
"-o",
|
|
234
|
+
help="Directory name (relative to experiment_dir) to store parsed files",
|
|
235
|
+
),
|
|
236
|
+
fmt: Literal["md"] = typer.Option(
|
|
237
|
+
"md",
|
|
238
|
+
"--format",
|
|
239
|
+
"-f",
|
|
240
|
+
help="Output format (currently only 'md' supported)",
|
|
241
|
+
),
|
|
242
|
+
overwrite: bool = typer.Option(
|
|
243
|
+
False,
|
|
244
|
+
"--overwrite",
|
|
245
|
+
help="Overwrite the output directory if it already exists",
|
|
246
|
+
),
|
|
247
|
+
):
|
|
248
|
+
"""Parse experiment artifacts into readable per-trace files."""
|
|
249
|
+
|
|
250
|
+
if fmt != "md":
|
|
251
|
+
raise typer.BadParameter("Only 'md' format is currently supported")
|
|
252
|
+
|
|
253
|
+
experiment_dir = _ensure_experiment_dir(experiment_dir)
|
|
254
|
+
output_dir = experiment_dir / output
|
|
255
|
+
|
|
256
|
+
if output_dir.exists():
|
|
257
|
+
if not overwrite:
|
|
258
|
+
raise typer.BadParameter(
|
|
259
|
+
f"Output directory already exists: {output_dir}. Use --overwrite to replace."
|
|
260
|
+
)
|
|
261
|
+
else:
|
|
262
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
263
|
+
|
|
264
|
+
console.print(
|
|
265
|
+
f"š Loading experiment from: [cyan]{experiment_dir.resolve()}[/cyan]"
|
|
266
|
+
)
|
|
267
|
+
|
|
268
|
+
observations_path = experiment_dir / "observations.jsonl"
|
|
269
|
+
trace_summary_path = experiment_dir / "trace_summary.jsonl"
|
|
270
|
+
|
|
271
|
+
observations = _load_observations(observations_path)
|
|
272
|
+
summaries = list(_load_trace_summaries(trace_summary_path))
|
|
273
|
+
|
|
274
|
+
if not summaries:
|
|
275
|
+
console.print("[yellow]No trace summaries found. Nothing to parse.[/yellow]")
|
|
276
|
+
raise typer.Exit(0)
|
|
277
|
+
|
|
278
|
+
console.print(
|
|
279
|
+
f"š Found {len(summaries)} trace summaries. Generating markdown..."
|
|
280
|
+
)
|
|
281
|
+
|
|
282
|
+
for summary in summaries:
|
|
283
|
+
trace_observations = observations.get(summary.trace_id, [])
|
|
284
|
+
content = _format_markdown(summary, trace_observations)
|
|
285
|
+
file_name = f"{summary.iteration:02d}_{_slugify(summary.trace_id)}.{fmt}"
|
|
286
|
+
target_path = output_dir / file_name
|
|
287
|
+
target_path.write_text(content, encoding="utf-8")
|
|
288
|
+
|
|
289
|
+
console.print(
|
|
290
|
+
f"ā
Generated {len(summaries)} files in: [green]{output_dir.resolve()}[/green]"
|
|
291
|
+
)
|
|
292
|
+
|
|
293
|
+
|
|
@@ -0,0 +1,310 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Run command for executing experiments and simulations.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
import sys
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Optional
|
|
9
|
+
|
|
10
|
+
import typer
|
|
11
|
+
from rich.console import Console
|
|
12
|
+
from rich.live import Live
|
|
13
|
+
from rich.panel import Panel
|
|
14
|
+
from rich.progress import Progress, SpinnerColumn, TextColumn, BarColumn, TimeRemainingColumn
|
|
15
|
+
from rich.table import Table
|
|
16
|
+
|
|
17
|
+
from ..runner import ExperimentRunner
|
|
18
|
+
from ..config_loader import load_experiment_config
|
|
19
|
+
from ..constants import DEFAULT_CONFIG_PATH, DEFAULT_ROOT_DIR_NAME
|
|
20
|
+
from ..project_paths import (
|
|
21
|
+
resolve_config_path,
|
|
22
|
+
resolve_project_relative,
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
app = typer.Typer()
|
|
26
|
+
console = Console()
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@app.command()
|
|
30
|
+
def experiment(
|
|
31
|
+
config_file: Path = typer.Option(
|
|
32
|
+
DEFAULT_CONFIG_PATH,
|
|
33
|
+
"--config",
|
|
34
|
+
"-c",
|
|
35
|
+
help="Path to experiment configuration file",
|
|
36
|
+
),
|
|
37
|
+
project: Optional[str] = typer.Option(
|
|
38
|
+
None,
|
|
39
|
+
"--project",
|
|
40
|
+
help="Project name under the FluxLoop root",
|
|
41
|
+
),
|
|
42
|
+
root: Path = typer.Option(
|
|
43
|
+
Path(DEFAULT_ROOT_DIR_NAME),
|
|
44
|
+
"--root",
|
|
45
|
+
help="FluxLoop root directory",
|
|
46
|
+
),
|
|
47
|
+
iterations: Optional[int] = typer.Option(
|
|
48
|
+
None,
|
|
49
|
+
"--iterations",
|
|
50
|
+
"-i",
|
|
51
|
+
help="Override number of iterations",
|
|
52
|
+
),
|
|
53
|
+
personas: Optional[str] = typer.Option(
|
|
54
|
+
None,
|
|
55
|
+
"--personas",
|
|
56
|
+
"-p",
|
|
57
|
+
help="Comma-separated list of personas to use",
|
|
58
|
+
),
|
|
59
|
+
output_dir: Optional[Path] = typer.Option(
|
|
60
|
+
None,
|
|
61
|
+
"--output",
|
|
62
|
+
"-o",
|
|
63
|
+
help="Override output directory",
|
|
64
|
+
),
|
|
65
|
+
no_collector: bool = typer.Option(
|
|
66
|
+
False,
|
|
67
|
+
"--no-collector",
|
|
68
|
+
help="Run without sending data to collector",
|
|
69
|
+
),
|
|
70
|
+
dry_run: bool = typer.Option(
|
|
71
|
+
False,
|
|
72
|
+
"--dry-run",
|
|
73
|
+
help="Show what would be run without executing",
|
|
74
|
+
),
|
|
75
|
+
):
|
|
76
|
+
"""
|
|
77
|
+
Run an experiment based on configuration file.
|
|
78
|
+
|
|
79
|
+
This command:
|
|
80
|
+
- Loads experiment configuration
|
|
81
|
+
- Generates prompt variations
|
|
82
|
+
- Runs agent with each variation
|
|
83
|
+
- Collects traces and metrics
|
|
84
|
+
- Generates summary report
|
|
85
|
+
"""
|
|
86
|
+
# Check if config file exists
|
|
87
|
+
resolved_config = resolve_config_path(config_file, project, root)
|
|
88
|
+
if not resolved_config.exists():
|
|
89
|
+
console.print(f"[red]Error:[/red] Configuration file not found: {config_file}")
|
|
90
|
+
console.print("\nRun [cyan]fluxloop init project[/cyan] to create a configuration file.")
|
|
91
|
+
raise typer.Exit(1)
|
|
92
|
+
|
|
93
|
+
# Load configuration
|
|
94
|
+
console.print(f"š Loading configuration from: [cyan]{resolved_config}[/cyan]")
|
|
95
|
+
|
|
96
|
+
try:
|
|
97
|
+
config = load_experiment_config(resolved_config)
|
|
98
|
+
except Exception as e:
|
|
99
|
+
console.print(f"[red]Error loading configuration:[/red] {e}")
|
|
100
|
+
raise typer.Exit(1)
|
|
101
|
+
|
|
102
|
+
# Override configuration if needed
|
|
103
|
+
if iterations is not None:
|
|
104
|
+
config.iterations = iterations
|
|
105
|
+
|
|
106
|
+
if personas:
|
|
107
|
+
persona_list = [p.strip() for p in personas.split(",")]
|
|
108
|
+
config.personas = [p for p in config.personas if p.name in persona_list]
|
|
109
|
+
|
|
110
|
+
if output_dir:
|
|
111
|
+
resolved_output = resolve_project_relative(output_dir, project, root)
|
|
112
|
+
config.output_directory = str(resolved_output)
|
|
113
|
+
|
|
114
|
+
# Load inputs to ensure accurate counts before showing the summary
|
|
115
|
+
try:
|
|
116
|
+
runner = ExperimentRunner(config, no_collector=no_collector)
|
|
117
|
+
loaded_inputs = asyncio.run(runner._load_inputs()) # type: ignore[attr-defined]
|
|
118
|
+
except Exception as e:
|
|
119
|
+
console.print(f"[red]Error preparing inputs:[/red] {e}")
|
|
120
|
+
raise typer.Exit(1)
|
|
121
|
+
|
|
122
|
+
config.set_resolved_input_count(len(loaded_inputs))
|
|
123
|
+
total_runs = config.estimate_total_runs()
|
|
124
|
+
|
|
125
|
+
summary = Table(title="Experiment Summary", show_header=False)
|
|
126
|
+
summary.add_column("Property", style="cyan")
|
|
127
|
+
summary.add_column("Value", style="white")
|
|
128
|
+
|
|
129
|
+
summary.add_row("Name", config.name)
|
|
130
|
+
summary.add_row("Iterations", str(config.iterations))
|
|
131
|
+
summary.add_row("Personas", str(len(config.personas)))
|
|
132
|
+
summary.add_row(
|
|
133
|
+
"Input Source",
|
|
134
|
+
"external file" if config.has_external_inputs() else "base_inputs",
|
|
135
|
+
)
|
|
136
|
+
summary.add_row("Total Runs", str(total_runs))
|
|
137
|
+
summary.add_row("Output", config.output_directory)
|
|
138
|
+
|
|
139
|
+
console.print(summary)
|
|
140
|
+
|
|
141
|
+
if dry_run:
|
|
142
|
+
console.print("\n[yellow]Dry run mode - no execution will occur[/yellow]")
|
|
143
|
+
return
|
|
144
|
+
|
|
145
|
+
# Confirm execution
|
|
146
|
+
if total_runs > 100:
|
|
147
|
+
console.print(
|
|
148
|
+
f"\n[yellow]Warning:[/yellow] This will execute {total_runs} runs. "
|
|
149
|
+
"This may take a while and incur API costs."
|
|
150
|
+
)
|
|
151
|
+
else:
|
|
152
|
+
console.print(
|
|
153
|
+
f"\nThis will execute {total_runs} runs."
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
if not typer.confirm("Continue?"):
|
|
157
|
+
raise typer.Abort()
|
|
158
|
+
|
|
159
|
+
# Create runner
|
|
160
|
+
# Run experiment with progress tracking
|
|
161
|
+
console.print("\n[bold green]ā¶ļø Starting experiment...[/bold green]\n")
|
|
162
|
+
|
|
163
|
+
with Progress(
|
|
164
|
+
SpinnerColumn(),
|
|
165
|
+
TextColumn("[progress.description]{task.description}"),
|
|
166
|
+
BarColumn(),
|
|
167
|
+
TextColumn("[progress.percentage]{task.percentage:>3.0f}%"),
|
|
168
|
+
TimeRemainingColumn(),
|
|
169
|
+
TextColumn("({task.completed} of {task.total})"),
|
|
170
|
+
console=console,
|
|
171
|
+
) as progress:
|
|
172
|
+
# Create main task
|
|
173
|
+
main_task = progress.add_task(
|
|
174
|
+
f"Running {config.name}",
|
|
175
|
+
total=total_runs,
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
def _progress_callback():
|
|
179
|
+
progress.advance(main_task)
|
|
180
|
+
|
|
181
|
+
# Run experiment
|
|
182
|
+
try:
|
|
183
|
+
results = asyncio.run(
|
|
184
|
+
runner.run_experiment(
|
|
185
|
+
progress_callback=_progress_callback
|
|
186
|
+
)
|
|
187
|
+
)
|
|
188
|
+
except KeyboardInterrupt:
|
|
189
|
+
console.print("\n[yellow]Experiment interrupted by user[/yellow]")
|
|
190
|
+
raise typer.Exit(1)
|
|
191
|
+
except Exception as e:
|
|
192
|
+
console.print(f"\n[red]Error during experiment:[/red] {e}")
|
|
193
|
+
# Debug mode - show full traceback
|
|
194
|
+
if "--debug" in sys.argv:
|
|
195
|
+
console.print_exception()
|
|
196
|
+
raise typer.Exit(1)
|
|
197
|
+
|
|
198
|
+
config.set_resolved_input_count(results.get("input_count", config.get_input_count()))
|
|
199
|
+
|
|
200
|
+
input_total_runs = config.estimate_total_runs()
|
|
201
|
+
|
|
202
|
+
if input_total_runs != total_runs:
|
|
203
|
+
console.print(
|
|
204
|
+
f"\n[yellow]Notice:[/yellow] Effective total runs adjusted to {input_total_runs} "
|
|
205
|
+
"after loading inputs."
|
|
206
|
+
)
|
|
207
|
+
|
|
208
|
+
_display_results(results)
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
@app.command()
|
|
212
|
+
def single(
|
|
213
|
+
agent_path: str = typer.Argument(
|
|
214
|
+
...,
|
|
215
|
+
help="Module path to agent (e.g., my_agent.main)",
|
|
216
|
+
),
|
|
217
|
+
input_text: str = typer.Argument(
|
|
218
|
+
...,
|
|
219
|
+
help="Input text for the agent",
|
|
220
|
+
),
|
|
221
|
+
function_name: str = typer.Option(
|
|
222
|
+
"run",
|
|
223
|
+
"--function",
|
|
224
|
+
"-f",
|
|
225
|
+
help="Function name to call",
|
|
226
|
+
),
|
|
227
|
+
trace_name: Optional[str] = typer.Option(
|
|
228
|
+
None,
|
|
229
|
+
"--trace-name",
|
|
230
|
+
help="Name for the trace",
|
|
231
|
+
),
|
|
232
|
+
no_collector: bool = typer.Option(
|
|
233
|
+
False,
|
|
234
|
+
"--no-collector",
|
|
235
|
+
help="Run without sending data to collector",
|
|
236
|
+
),
|
|
237
|
+
):
|
|
238
|
+
"""
|
|
239
|
+
Run a single agent execution.
|
|
240
|
+
|
|
241
|
+
Quick way to test an agent without a full experiment configuration.
|
|
242
|
+
"""
|
|
243
|
+
console.print(f"š¤ Running agent: [cyan]{agent_path}.{function_name}[/cyan]")
|
|
244
|
+
console.print(f"š Input: {input_text[:100]}{'...' if len(input_text) > 100 else ''}")
|
|
245
|
+
|
|
246
|
+
# Create minimal runner
|
|
247
|
+
from ..runner import SingleRunner
|
|
248
|
+
|
|
249
|
+
runner = SingleRunner(
|
|
250
|
+
module_path=agent_path,
|
|
251
|
+
function_name=function_name,
|
|
252
|
+
trace_name=trace_name or f"single_{agent_path}",
|
|
253
|
+
no_collector=no_collector,
|
|
254
|
+
)
|
|
255
|
+
|
|
256
|
+
# Run agent
|
|
257
|
+
console.print("\n[bold]Executing...[/bold]\n")
|
|
258
|
+
|
|
259
|
+
try:
|
|
260
|
+
result = asyncio.run(runner.run(input_text))
|
|
261
|
+
|
|
262
|
+
console.print(Panel(
|
|
263
|
+
str(result),
|
|
264
|
+
title="[bold green]Result[/bold green]",
|
|
265
|
+
border_style="green",
|
|
266
|
+
))
|
|
267
|
+
|
|
268
|
+
except Exception as e:
|
|
269
|
+
console.print(f"[red]Error:[/red] {e}")
|
|
270
|
+
# Debug mode - show full traceback
|
|
271
|
+
if "--debug" in sys.argv:
|
|
272
|
+
console.print_exception()
|
|
273
|
+
raise typer.Exit(1)
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
def _display_results(results: dict):
|
|
277
|
+
"""Display experiment results."""
|
|
278
|
+
console.print("\n" + "="*50)
|
|
279
|
+
console.print("[bold green]Experiment Complete![/bold green]")
|
|
280
|
+
console.print("="*50 + "\n")
|
|
281
|
+
|
|
282
|
+
# Create results table
|
|
283
|
+
table = Table(title="Results Summary")
|
|
284
|
+
table.add_column("Metric", style="cyan")
|
|
285
|
+
table.add_column("Value", style="white")
|
|
286
|
+
|
|
287
|
+
# Add metrics
|
|
288
|
+
table.add_row("Total Runs", str(results.get("total_runs", 0)))
|
|
289
|
+
table.add_row("Successful", str(results.get("successful", 0)))
|
|
290
|
+
table.add_row("Failed", str(results.get("failed", 0)))
|
|
291
|
+
|
|
292
|
+
success_rate = results.get("success_rate", 0) * 100
|
|
293
|
+
table.add_row("Success Rate", f"{success_rate:.1f}%")
|
|
294
|
+
|
|
295
|
+
avg_duration = results.get("avg_duration_ms", 0)
|
|
296
|
+
table.add_row("Avg Duration", f"{avg_duration:.0f}ms")
|
|
297
|
+
|
|
298
|
+
console.print(table)
|
|
299
|
+
|
|
300
|
+
# Show output location
|
|
301
|
+
if results.get("output_dir"):
|
|
302
|
+
console.print(f"\nš Results saved to: [cyan]{results['output_dir']}[/cyan]")
|
|
303
|
+
|
|
304
|
+
# Show trace URLs if available
|
|
305
|
+
if results.get("trace_urls"):
|
|
306
|
+
console.print("\nš View traces:")
|
|
307
|
+
for url in results["trace_urls"][:5]: # Show first 5
|
|
308
|
+
console.print(f" {url}")
|
|
309
|
+
if len(results["trace_urls"]) > 5:
|
|
310
|
+
console.print(f" ... and {len(results['trace_urls']) - 5} more")
|