PyPI - buildlog - Versions diffs - 0.4.0__py3-none-any.whl → 0.5.0__py3-none-any.whl - Mend

buildlog 0.4.0py3-none-any.whl → 0.5.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

buildlog/cli.py CHANGED Viewed

@@ -8,6 +8,7 @@ from pathlib import Path
 import click
+from buildlog.core import get_rewards, log_reward
 from buildlog.distill import CATEGORIES, distill_all, format_output
 from buildlog.skills import format_skills, generate_skills
 from buildlog.stats import calculate_stats, format_dashboard, format_json
@@ -456,5 +457,424 @@ def skills(
         click.echo(formatted)
+@main.command()
+@click.argument("outcome", type=click.Choice(["accepted", "revision", "rejected"]))
+@click.option(
+    "--distance",
+    "-d",
+    type=float,
+    help="Revision distance (0-1, 0=minor tweak, 1=complete redo)",
+)
+@click.option("--error-class", "-e", help="Category of error (e.g., missing_test)")
+@click.option("--notes", "-n", help="Additional notes about the feedback")
+@click.option("--rules", "-r", multiple=True, help="Active rule IDs")
+@click.option("--json", "output_json", is_flag=True, help="Output as JSON")
+def reward(
+    outcome: str,
+    distance: float | None,
+    error_class: str | None,
+    notes: str | None,
+    rules: tuple[str, ...],
+    output_json: bool,
+):
+    """Log a reward signal for the learning loop.
+    Used to provide feedback on agent work for bandit learning.
+    OUTCOME is one of:
+      - accepted: Work was accepted as-is (reward=1.0)
+      - revision: Work needed changes (reward=1-distance)
+      - rejected: Work was rejected entirely (reward=0.0)
+    Examples:
+        buildlog reward accepted
+        buildlog reward revision --distance 0.3 --error-class missing_test
+        buildlog reward rejected --notes "Completely wrong approach"
+        buildlog reward accepted --rules arch-123 --rules wf-456
+    """
+    import json as json_module
+    from dataclasses import asdict
+    buildlog_dir = Path("buildlog")
+    if not buildlog_dir.exists():
+        click.echo("No buildlog/ directory found. Run 'buildlog init' first.", err=True)
+        raise SystemExit(1)
+    result = log_reward(
+        buildlog_dir,
+        outcome=outcome,  # type: ignore[arg-type]
+        rules_active=list(rules) if rules else None,
+        revision_distance=distance,
+        error_class=error_class,
+        notes=notes,
+        source="cli",
+    )
+    if output_json:
+        click.echo(json_module.dumps(asdict(result), indent=2))
+    else:
+        click.echo(f"✓ {result.message}")
+        click.echo(f"  Reward ID: {result.reward_id}")
+        click.echo(f"  Total events: {result.total_events}")
+@main.command()
+@click.option("--limit", "-n", type=int, help="Limit number of events to show")
+@click.option("--json", "output_json", is_flag=True, help="Output as JSON")
+def rewards(limit: int | None, output_json: bool):
+    """List reward events and summary statistics.
+    Shows recent reward events and aggregate statistics useful for
+    tracking learning progress.
+    Examples:
+        buildlog rewards              # Show all with summary
+        buildlog rewards --limit 10   # Show 10 most recent
+        buildlog rewards --json       # JSON output for scripts
+    """
+    import json as json_module
+    buildlog_dir = Path("buildlog")
+    if not buildlog_dir.exists():
+        click.echo("No buildlog/ directory found. Run 'buildlog init' first.", err=True)
+        raise SystemExit(1)
+    summary = get_rewards(buildlog_dir, limit=limit)
+    if output_json:
+        data = {
+            "total_events": summary.total_events,
+            "accepted": summary.accepted,
+            "revisions": summary.revisions,
+            "rejected": summary.rejected,
+            "mean_reward": summary.mean_reward,
+            "events": [e.to_dict() for e in summary.events],
+        }
+        click.echo(json_module.dumps(data, indent=2))
+    else:
+        # Summary header
+        click.echo("Reward Signal Summary")
+        click.echo("=" * 40)
+        click.echo(f"Total events:  {summary.total_events}")
+        click.echo(f"  Accepted:    {summary.accepted}")
+        click.echo(f"  Revisions:   {summary.revisions}")
+        click.echo(f"  Rejected:    {summary.rejected}")
+        click.echo(f"Mean reward:   {summary.mean_reward:.3f}")
+        click.echo()
+        if summary.events:
+            click.echo("Recent Events")
+            click.echo("-" * 40)
+            for event in summary.events:
+                ts = event.timestamp.strftime("%Y-%m-%d %H:%M")
+                outcome_str = event.outcome.upper()
+                reward_str = f"r={event.reward_value:.2f}"
+                click.echo(f"  [{ts}] {outcome_str} ({reward_str})")
+                if event.error_class:
+                    click.echo(f"           error_class: {event.error_class}")
+                if event.notes:
+                    click.echo(f"           notes: {event.notes}")
+        else:
+            click.echo("No reward events yet.")
+            click.echo("Log your first with: buildlog reward accepted")
+# -----------------------------------------------------------------------------
+# Experiment Commands (Session Tracking for Issue #21)
+# -----------------------------------------------------------------------------
+@main.group()
+def experiment():
+    """Commands for running learning experiments.
+    Track sessions, log mistakes, and measure repeated-mistake rates
+    to evaluate buildlog's effectiveness.
+    Example workflow:
+        buildlog experiment start --error-class missing_test
+        # ... do work, log mistakes as you encounter them ...
+        buildlog experiment log-mistake --class missing_test --description "..."
+        buildlog experiment end
+        buildlog experiment report
+    """
+    pass
+@experiment.command("start")
+@click.option(
+    "--error-class",
+    "-e",
+    help="Error class being targeted (e.g., 'missing_test')",
+)
+@click.option("--notes", "-n", help="Notes about this session")
+@click.option("--json", "output_json", is_flag=True, help="Output as JSON")
+def experiment_start(
+    error_class: str | None,
+    notes: str | None,
+    output_json: bool,
+):
+    """Start a new experiment session.
+    This begins tracking for a learning experiment. Captures the current
+    set of active rules to measure learning over time.
+    Examples:
+        buildlog experiment start
+        buildlog experiment start --error-class missing_test
+        buildlog experiment start --error-class validation_boundary --notes "Testing edge cases"
+    """
+    import json as json_module
+    from dataclasses import asdict
+    from buildlog.core import start_session
+    buildlog_dir = Path("buildlog")
+    if not buildlog_dir.exists():
+        click.echo("No buildlog/ directory found. Run 'buildlog init' first.", err=True)
+        raise SystemExit(1)
+    try:
+        result = start_session(buildlog_dir, error_class=error_class, notes=notes)
+    except ValueError as e:
+        click.echo(f"Error: {e}", err=True)
+        raise SystemExit(1)
+    if output_json:
+        click.echo(json_module.dumps(asdict(result), indent=2))
+    else:
+        click.echo(f"✓ {result.message}")
+        if error_class:
+            click.echo(f"  Error class: {error_class}")
+@experiment.command("end")
+@click.option("--entry-file", "-f", help="Corresponding buildlog entry file")
+@click.option("--notes", "-n", help="Additional notes about this session")
+@click.option("--json", "output_json", is_flag=True, help="Output as JSON")
+def experiment_end(
+    entry_file: str | None,
+    notes: str | None,
+    output_json: bool,
+):
+    """End the current experiment session.
+    Finalizes the session and calculates metrics including:
+    - Total mistakes logged
+    - Repeated mistakes (from prior sessions)
+    - Rules added during session
+    Examples:
+        buildlog experiment end
+        buildlog experiment end --entry-file 2026-01-21.md
+        buildlog experiment end --notes "Good session, learned 2 new rules"
+    """
+    import json as json_module
+    from dataclasses import asdict
+    from buildlog.core import end_session
+    buildlog_dir = Path("buildlog")
+    if not buildlog_dir.exists():
+        click.echo("No buildlog/ directory found. Run 'buildlog init' first.", err=True)
+        raise SystemExit(1)
+    try:
+        result = end_session(buildlog_dir, entry_file=entry_file, notes=notes)
+    except ValueError as e:
+        click.echo(f"Error: {e}", err=True)
+        raise SystemExit(1)
+    if output_json:
+        click.echo(json_module.dumps(asdict(result), indent=2))
+    else:
+        click.echo(f"✓ {result.message}")
+        click.echo(f"  Duration: {result.duration_minutes} minutes")
+        click.echo(
+            f"  Mistakes: {result.mistakes_logged} ({result.repeated_mistakes} repeats)"
+        )
+        click.echo(f"  Rules: {result.rules_at_start} → {result.rules_at_end}")
+@experiment.command("log-mistake")
+@click.option(
+    "--class",
+    "error_class",
+    required=True,
+    help="Error class (e.g., 'missing_test', 'validation_boundary')",
+)
+@click.option(
+    "--description",
+    "-d",
+    required=True,
+    help="Description of the mistake",
+)
+@click.option(
+    "--rule",
+    "-r",
+    "corrected_by_rule",
+    help="Rule ID that should have prevented this",
+)
+@click.option("--json", "output_json", is_flag=True, help="Output as JSON")
+def experiment_log_mistake(
+    error_class: str,
+    description: str,
+    corrected_by_rule: str | None,
+    output_json: bool,
+):
+    """Log a mistake during the current session.
+    Records the mistake and checks if it's a repeat of a prior mistake
+    (from earlier sessions). This enables measuring repeated-mistake rates.
+    Examples:
+        buildlog experiment log-mistake --class missing_test -d "Forgot tests"
+        buildlog experiment log-mistake --class validation -d "No max length" -r val-123
+    """
+    import json as json_module
+    from dataclasses import asdict
+    from buildlog.core import log_mistake
+    buildlog_dir = Path("buildlog")
+    if not buildlog_dir.exists():
+        click.echo("No buildlog/ directory found. Run 'buildlog init' first.", err=True)
+        raise SystemExit(1)
+    try:
+        result = log_mistake(
+            buildlog_dir,
+            error_class=error_class,
+            description=description,
+            corrected_by_rule=corrected_by_rule,
+        )
+    except ValueError as e:
+        click.echo(f"Error: {e}", err=True)
+        raise SystemExit(1)
+    if output_json:
+        click.echo(json_module.dumps(asdict(result), indent=2))
+    else:
+        if result.was_repeat:
+            click.echo(f"⚠ REPEAT: {result.message}")
+            click.echo(f"  Similar to: {result.similar_prior}")
+        else:
+            click.echo(f"✓ {result.message}")
+@experiment.command("metrics")
+@click.option(
+    "--session", "-s", "session_id", help="Specific session ID (or aggregate)"
+)
+@click.option("--json", "output_json", is_flag=True, help="Output as JSON")
+def experiment_metrics(session_id: str | None, output_json: bool):
+    """Show metrics for a session or all sessions.
+    Displays mistake rates and rule changes.
+    Examples:
+        buildlog experiment metrics                           # Aggregate metrics
+        buildlog experiment metrics --session session-20260121-140000
+    """
+    import json as json_module
+    from dataclasses import asdict
+    from buildlog.core import get_session_metrics
+    buildlog_dir = Path("buildlog")
+    if not buildlog_dir.exists():
+        click.echo("No buildlog/ directory found. Run 'buildlog init' first.", err=True)
+        raise SystemExit(1)
+    try:
+        metrics = get_session_metrics(buildlog_dir, session_id=session_id)
+    except ValueError as e:
+        click.echo(f"Error: {e}", err=True)
+        raise SystemExit(1)
+    if output_json:
+        click.echo(json_module.dumps(asdict(metrics), indent=2))
+    else:
+        click.echo(f"Session Metrics: {metrics.session_id}")
+        click.echo("=" * 40)
+        click.echo(f"Total mistakes:     {metrics.total_mistakes}")
+        click.echo(f"Repeated mistakes:  {metrics.repeated_mistakes}")
+        click.echo(f"Repeat rate:        {metrics.repeated_mistake_rate:.1%}")
+        click.echo(f"Rules at start:     {metrics.rules_at_start}")
+        click.echo(f"Rules at end:       {metrics.rules_at_end}")
+        click.echo(f"Rules added:        {metrics.rules_added:+d}")
+@experiment.command("report")
+@click.option("--json", "output_json", is_flag=True, help="Output as JSON")
+def experiment_report(output_json: bool):
+    """Generate a comprehensive experiment report.
+    Shows summary statistics, per-session breakdown, and error class analysis.
+    Examples:
+        buildlog experiment report
+        buildlog experiment report --json > report.json
+    """
+    import json as json_module
+    from buildlog.core import get_experiment_report
+    buildlog_dir = Path("buildlog")
+    if not buildlog_dir.exists():
+        click.echo("No buildlog/ directory found. Run 'buildlog init' first.", err=True)
+        raise SystemExit(1)
+    report = get_experiment_report(buildlog_dir)
+    if output_json:
+        click.echo(json_module.dumps(report, indent=2))
+    else:
+        summary = report["summary"]
+        click.echo("Experiment Report")
+        click.echo("=" * 50)
+        click.echo(f"Total sessions:         {summary['total_sessions']}")
+        click.echo(f"Total mistakes:         {summary['total_mistakes']}")
+        click.echo(f"Repeated mistakes:      {summary['total_repeated']}")
+        click.echo(f"Overall repeat rate:    {summary['overall_repeat_rate']:.1%}")
+        click.echo()
+        if report["sessions"]:
+            click.echo("Per-Session Breakdown")
+            click.echo("-" * 50)
+            for sess in report["sessions"]:
+                rate = sess["repeated_mistake_rate"]
+                click.echo(f"  {sess['session_id']}")
+                click.echo(
+                    f"    Mistakes: {sess['total_mistakes']} ({sess['repeated_mistakes']} repeats, {rate:.0%})"
+                )
+                click.echo(f"    Rules added: {sess['rules_added']:+d}")
+            click.echo()
+        if report["error_classes"]:
+            click.echo("Error Class Breakdown")
+            click.echo("-" * 50)
+            for ec, data in report["error_classes"].items():
+                rate = data["repeated"] / data["total"] if data["total"] > 0 else 0
+                click.echo(
+                    f"  {ec}: {data['total']} mistakes ({data['repeated']} repeats, {rate:.0%})"
+                )
 if __name__ == "__main__":
     main()

buildlog/core/__init__.py CHANGED Viewed

@@ -2,17 +2,33 @@
 from buildlog.core.operations import (
     DiffResult,
+    EndSessionResult,
     LearnFromReviewResult,
+    LogMistakeResult,
+    LogRewardResult,
+    Mistake,
     PromoteResult,
     RejectResult,
     ReviewIssue,
     ReviewLearning,
+    RewardEvent,
+    RewardSummary,
+    Session,
+    SessionMetrics,
+    StartSessionResult,
     StatusResult,
     diff,
+    end_session,
     find_skills_by_ids,
+    get_experiment_report,
+    get_rewards,
+    get_session_metrics,
     learn_from_review,
+    log_mistake,
+    log_reward,
     promote,
     reject,
+    start_session,
     status,
 )
@@ -24,10 +40,28 @@ __all__ = [
     "ReviewIssue",
     "ReviewLearning",
     "LearnFromReviewResult",
+    "RewardEvent",
+    "LogRewardResult",
+    "RewardSummary",
+    # Session tracking
+    "Session",
+    "Mistake",
+    "SessionMetrics",
+    "StartSessionResult",
+    "EndSessionResult",
+    "LogMistakeResult",
     "status",
     "promote",
     "reject",
     "diff",
     "find_skills_by_ids",
     "learn_from_review",
+    "log_reward",
+    "get_rewards",
+    # Session tracking operations
+    "start_session",
+    "end_session",
+    "log_mistake",
+    "get_session_metrics",
+    "get_experiment_report",
 ]

buildlog 0.4.0__py3-none-any.whl → 0.5.0__py3-none-any.whl

buildlog 0.4.0py3-none-any.whl → 0.5.0py3-none-any.whl