buildlog 0.4.0__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
buildlog/cli.py CHANGED
@@ -8,6 +8,7 @@ from pathlib import Path
8
8
 
9
9
  import click
10
10
 
11
+ from buildlog.core import get_rewards, log_reward
11
12
  from buildlog.distill import CATEGORIES, distill_all, format_output
12
13
  from buildlog.skills import format_skills, generate_skills
13
14
  from buildlog.stats import calculate_stats, format_dashboard, format_json
@@ -456,5 +457,424 @@ def skills(
456
457
  click.echo(formatted)
457
458
 
458
459
 
460
+ @main.command()
461
+ @click.argument("outcome", type=click.Choice(["accepted", "revision", "rejected"]))
462
+ @click.option(
463
+ "--distance",
464
+ "-d",
465
+ type=float,
466
+ help="Revision distance (0-1, 0=minor tweak, 1=complete redo)",
467
+ )
468
+ @click.option("--error-class", "-e", help="Category of error (e.g., missing_test)")
469
+ @click.option("--notes", "-n", help="Additional notes about the feedback")
470
+ @click.option("--rules", "-r", multiple=True, help="Active rule IDs")
471
+ @click.option("--json", "output_json", is_flag=True, help="Output as JSON")
472
+ def reward(
473
+ outcome: str,
474
+ distance: float | None,
475
+ error_class: str | None,
476
+ notes: str | None,
477
+ rules: tuple[str, ...],
478
+ output_json: bool,
479
+ ):
480
+ """Log a reward signal for the learning loop.
481
+
482
+ Used to provide feedback on agent work for bandit learning.
483
+
484
+ OUTCOME is one of:
485
+ - accepted: Work was accepted as-is (reward=1.0)
486
+ - revision: Work needed changes (reward=1-distance)
487
+ - rejected: Work was rejected entirely (reward=0.0)
488
+
489
+ Examples:
490
+
491
+ buildlog reward accepted
492
+ buildlog reward revision --distance 0.3 --error-class missing_test
493
+ buildlog reward rejected --notes "Completely wrong approach"
494
+ buildlog reward accepted --rules arch-123 --rules wf-456
495
+ """
496
+ import json as json_module
497
+ from dataclasses import asdict
498
+
499
+ buildlog_dir = Path("buildlog")
500
+
501
+ if not buildlog_dir.exists():
502
+ click.echo("No buildlog/ directory found. Run 'buildlog init' first.", err=True)
503
+ raise SystemExit(1)
504
+
505
+ result = log_reward(
506
+ buildlog_dir,
507
+ outcome=outcome, # type: ignore[arg-type]
508
+ rules_active=list(rules) if rules else None,
509
+ revision_distance=distance,
510
+ error_class=error_class,
511
+ notes=notes,
512
+ source="cli",
513
+ )
514
+
515
+ if output_json:
516
+ click.echo(json_module.dumps(asdict(result), indent=2))
517
+ else:
518
+ click.echo(f"✓ {result.message}")
519
+ click.echo(f" Reward ID: {result.reward_id}")
520
+ click.echo(f" Total events: {result.total_events}")
521
+
522
+
523
+ @main.command()
524
+ @click.option("--limit", "-n", type=int, help="Limit number of events to show")
525
+ @click.option("--json", "output_json", is_flag=True, help="Output as JSON")
526
+ def rewards(limit: int | None, output_json: bool):
527
+ """List reward events and summary statistics.
528
+
529
+ Shows recent reward events and aggregate statistics useful for
530
+ tracking learning progress.
531
+
532
+ Examples:
533
+
534
+ buildlog rewards # Show all with summary
535
+ buildlog rewards --limit 10 # Show 10 most recent
536
+ buildlog rewards --json # JSON output for scripts
537
+ """
538
+ import json as json_module
539
+
540
+ buildlog_dir = Path("buildlog")
541
+
542
+ if not buildlog_dir.exists():
543
+ click.echo("No buildlog/ directory found. Run 'buildlog init' first.", err=True)
544
+ raise SystemExit(1)
545
+
546
+ summary = get_rewards(buildlog_dir, limit=limit)
547
+
548
+ if output_json:
549
+ data = {
550
+ "total_events": summary.total_events,
551
+ "accepted": summary.accepted,
552
+ "revisions": summary.revisions,
553
+ "rejected": summary.rejected,
554
+ "mean_reward": summary.mean_reward,
555
+ "events": [e.to_dict() for e in summary.events],
556
+ }
557
+ click.echo(json_module.dumps(data, indent=2))
558
+ else:
559
+ # Summary header
560
+ click.echo("Reward Signal Summary")
561
+ click.echo("=" * 40)
562
+ click.echo(f"Total events: {summary.total_events}")
563
+ click.echo(f" Accepted: {summary.accepted}")
564
+ click.echo(f" Revisions: {summary.revisions}")
565
+ click.echo(f" Rejected: {summary.rejected}")
566
+ click.echo(f"Mean reward: {summary.mean_reward:.3f}")
567
+ click.echo()
568
+
569
+ if summary.events:
570
+ click.echo("Recent Events")
571
+ click.echo("-" * 40)
572
+ for event in summary.events:
573
+ ts = event.timestamp.strftime("%Y-%m-%d %H:%M")
574
+ outcome_str = event.outcome.upper()
575
+ reward_str = f"r={event.reward_value:.2f}"
576
+ click.echo(f" [{ts}] {outcome_str} ({reward_str})")
577
+ if event.error_class:
578
+ click.echo(f" error_class: {event.error_class}")
579
+ if event.notes:
580
+ click.echo(f" notes: {event.notes}")
581
+ else:
582
+ click.echo("No reward events yet.")
583
+ click.echo("Log your first with: buildlog reward accepted")
584
+
585
+
586
+ # -----------------------------------------------------------------------------
587
+ # Experiment Commands (Session Tracking for Issue #21)
588
+ # -----------------------------------------------------------------------------
589
+
590
+
591
+ @main.group()
592
+ def experiment():
593
+ """Commands for running learning experiments.
594
+
595
+ Track sessions, log mistakes, and measure repeated-mistake rates
596
+ to evaluate buildlog's effectiveness.
597
+
598
+ Example workflow:
599
+
600
+ buildlog experiment start --error-class missing_test
601
+ # ... do work, log mistakes as you encounter them ...
602
+ buildlog experiment log-mistake --class missing_test --description "..."
603
+ buildlog experiment end
604
+ buildlog experiment report
605
+ """
606
+ pass
607
+
608
+
609
+ @experiment.command("start")
610
+ @click.option(
611
+ "--error-class",
612
+ "-e",
613
+ help="Error class being targeted (e.g., 'missing_test')",
614
+ )
615
+ @click.option("--notes", "-n", help="Notes about this session")
616
+ @click.option("--json", "output_json", is_flag=True, help="Output as JSON")
617
+ def experiment_start(
618
+ error_class: str | None,
619
+ notes: str | None,
620
+ output_json: bool,
621
+ ):
622
+ """Start a new experiment session.
623
+
624
+ This begins tracking for a learning experiment. Captures the current
625
+ set of active rules to measure learning over time.
626
+
627
+ Examples:
628
+
629
+ buildlog experiment start
630
+ buildlog experiment start --error-class missing_test
631
+ buildlog experiment start --error-class validation_boundary --notes "Testing edge cases"
632
+ """
633
+ import json as json_module
634
+ from dataclasses import asdict
635
+
636
+ from buildlog.core import start_session
637
+
638
+ buildlog_dir = Path("buildlog")
639
+
640
+ if not buildlog_dir.exists():
641
+ click.echo("No buildlog/ directory found. Run 'buildlog init' first.", err=True)
642
+ raise SystemExit(1)
643
+
644
+ try:
645
+ result = start_session(buildlog_dir, error_class=error_class, notes=notes)
646
+ except ValueError as e:
647
+ click.echo(f"Error: {e}", err=True)
648
+ raise SystemExit(1)
649
+
650
+ if output_json:
651
+ click.echo(json_module.dumps(asdict(result), indent=2))
652
+ else:
653
+ click.echo(f"✓ {result.message}")
654
+ if error_class:
655
+ click.echo(f" Error class: {error_class}")
656
+
657
+
658
+ @experiment.command("end")
659
+ @click.option("--entry-file", "-f", help="Corresponding buildlog entry file")
660
+ @click.option("--notes", "-n", help="Additional notes about this session")
661
+ @click.option("--json", "output_json", is_flag=True, help="Output as JSON")
662
+ def experiment_end(
663
+ entry_file: str | None,
664
+ notes: str | None,
665
+ output_json: bool,
666
+ ):
667
+ """End the current experiment session.
668
+
669
+ Finalizes the session and calculates metrics including:
670
+ - Total mistakes logged
671
+ - Repeated mistakes (from prior sessions)
672
+ - Rules added during session
673
+
674
+ Examples:
675
+
676
+ buildlog experiment end
677
+ buildlog experiment end --entry-file 2026-01-21.md
678
+ buildlog experiment end --notes "Good session, learned 2 new rules"
679
+ """
680
+ import json as json_module
681
+ from dataclasses import asdict
682
+
683
+ from buildlog.core import end_session
684
+
685
+ buildlog_dir = Path("buildlog")
686
+
687
+ if not buildlog_dir.exists():
688
+ click.echo("No buildlog/ directory found. Run 'buildlog init' first.", err=True)
689
+ raise SystemExit(1)
690
+
691
+ try:
692
+ result = end_session(buildlog_dir, entry_file=entry_file, notes=notes)
693
+ except ValueError as e:
694
+ click.echo(f"Error: {e}", err=True)
695
+ raise SystemExit(1)
696
+
697
+ if output_json:
698
+ click.echo(json_module.dumps(asdict(result), indent=2))
699
+ else:
700
+ click.echo(f"✓ {result.message}")
701
+ click.echo(f" Duration: {result.duration_minutes} minutes")
702
+ click.echo(
703
+ f" Mistakes: {result.mistakes_logged} ({result.repeated_mistakes} repeats)"
704
+ )
705
+ click.echo(f" Rules: {result.rules_at_start} → {result.rules_at_end}")
706
+
707
+
708
+ @experiment.command("log-mistake")
709
+ @click.option(
710
+ "--class",
711
+ "error_class",
712
+ required=True,
713
+ help="Error class (e.g., 'missing_test', 'validation_boundary')",
714
+ )
715
+ @click.option(
716
+ "--description",
717
+ "-d",
718
+ required=True,
719
+ help="Description of the mistake",
720
+ )
721
+ @click.option(
722
+ "--rule",
723
+ "-r",
724
+ "corrected_by_rule",
725
+ help="Rule ID that should have prevented this",
726
+ )
727
+ @click.option("--json", "output_json", is_flag=True, help="Output as JSON")
728
+ def experiment_log_mistake(
729
+ error_class: str,
730
+ description: str,
731
+ corrected_by_rule: str | None,
732
+ output_json: bool,
733
+ ):
734
+ """Log a mistake during the current session.
735
+
736
+ Records the mistake and checks if it's a repeat of a prior mistake
737
+ (from earlier sessions). This enables measuring repeated-mistake rates.
738
+
739
+ Examples:
740
+
741
+ buildlog experiment log-mistake --class missing_test -d "Forgot tests"
742
+ buildlog experiment log-mistake --class validation -d "No max length" -r val-123
743
+ """
744
+ import json as json_module
745
+ from dataclasses import asdict
746
+
747
+ from buildlog.core import log_mistake
748
+
749
+ buildlog_dir = Path("buildlog")
750
+
751
+ if not buildlog_dir.exists():
752
+ click.echo("No buildlog/ directory found. Run 'buildlog init' first.", err=True)
753
+ raise SystemExit(1)
754
+
755
+ try:
756
+ result = log_mistake(
757
+ buildlog_dir,
758
+ error_class=error_class,
759
+ description=description,
760
+ corrected_by_rule=corrected_by_rule,
761
+ )
762
+ except ValueError as e:
763
+ click.echo(f"Error: {e}", err=True)
764
+ raise SystemExit(1)
765
+
766
+ if output_json:
767
+ click.echo(json_module.dumps(asdict(result), indent=2))
768
+ else:
769
+ if result.was_repeat:
770
+ click.echo(f"⚠ REPEAT: {result.message}")
771
+ click.echo(f" Similar to: {result.similar_prior}")
772
+ else:
773
+ click.echo(f"✓ {result.message}")
774
+
775
+
776
+ @experiment.command("metrics")
777
+ @click.option(
778
+ "--session", "-s", "session_id", help="Specific session ID (or aggregate)"
779
+ )
780
+ @click.option("--json", "output_json", is_flag=True, help="Output as JSON")
781
+ def experiment_metrics(session_id: str | None, output_json: bool):
782
+ """Show metrics for a session or all sessions.
783
+
784
+ Displays mistake rates and rule changes.
785
+
786
+ Examples:
787
+
788
+ buildlog experiment metrics # Aggregate metrics
789
+ buildlog experiment metrics --session session-20260121-140000
790
+ """
791
+ import json as json_module
792
+ from dataclasses import asdict
793
+
794
+ from buildlog.core import get_session_metrics
795
+
796
+ buildlog_dir = Path("buildlog")
797
+
798
+ if not buildlog_dir.exists():
799
+ click.echo("No buildlog/ directory found. Run 'buildlog init' first.", err=True)
800
+ raise SystemExit(1)
801
+
802
+ try:
803
+ metrics = get_session_metrics(buildlog_dir, session_id=session_id)
804
+ except ValueError as e:
805
+ click.echo(f"Error: {e}", err=True)
806
+ raise SystemExit(1)
807
+
808
+ if output_json:
809
+ click.echo(json_module.dumps(asdict(metrics), indent=2))
810
+ else:
811
+ click.echo(f"Session Metrics: {metrics.session_id}")
812
+ click.echo("=" * 40)
813
+ click.echo(f"Total mistakes: {metrics.total_mistakes}")
814
+ click.echo(f"Repeated mistakes: {metrics.repeated_mistakes}")
815
+ click.echo(f"Repeat rate: {metrics.repeated_mistake_rate:.1%}")
816
+ click.echo(f"Rules at start: {metrics.rules_at_start}")
817
+ click.echo(f"Rules at end: {metrics.rules_at_end}")
818
+ click.echo(f"Rules added: {metrics.rules_added:+d}")
819
+
820
+
821
+ @experiment.command("report")
822
+ @click.option("--json", "output_json", is_flag=True, help="Output as JSON")
823
+ def experiment_report(output_json: bool):
824
+ """Generate a comprehensive experiment report.
825
+
826
+ Shows summary statistics, per-session breakdown, and error class analysis.
827
+
828
+ Examples:
829
+
830
+ buildlog experiment report
831
+ buildlog experiment report --json > report.json
832
+ """
833
+ import json as json_module
834
+
835
+ from buildlog.core import get_experiment_report
836
+
837
+ buildlog_dir = Path("buildlog")
838
+
839
+ if not buildlog_dir.exists():
840
+ click.echo("No buildlog/ directory found. Run 'buildlog init' first.", err=True)
841
+ raise SystemExit(1)
842
+
843
+ report = get_experiment_report(buildlog_dir)
844
+
845
+ if output_json:
846
+ click.echo(json_module.dumps(report, indent=2))
847
+ else:
848
+ summary = report["summary"]
849
+ click.echo("Experiment Report")
850
+ click.echo("=" * 50)
851
+ click.echo(f"Total sessions: {summary['total_sessions']}")
852
+ click.echo(f"Total mistakes: {summary['total_mistakes']}")
853
+ click.echo(f"Repeated mistakes: {summary['total_repeated']}")
854
+ click.echo(f"Overall repeat rate: {summary['overall_repeat_rate']:.1%}")
855
+ click.echo()
856
+
857
+ if report["sessions"]:
858
+ click.echo("Per-Session Breakdown")
859
+ click.echo("-" * 50)
860
+ for sess in report["sessions"]:
861
+ rate = sess["repeated_mistake_rate"]
862
+ click.echo(f" {sess['session_id']}")
863
+ click.echo(
864
+ f" Mistakes: {sess['total_mistakes']} ({sess['repeated_mistakes']} repeats, {rate:.0%})"
865
+ )
866
+ click.echo(f" Rules added: {sess['rules_added']:+d}")
867
+ click.echo()
868
+
869
+ if report["error_classes"]:
870
+ click.echo("Error Class Breakdown")
871
+ click.echo("-" * 50)
872
+ for ec, data in report["error_classes"].items():
873
+ rate = data["repeated"] / data["total"] if data["total"] > 0 else 0
874
+ click.echo(
875
+ f" {ec}: {data['total']} mistakes ({data['repeated']} repeats, {rate:.0%})"
876
+ )
877
+
878
+
459
879
  if __name__ == "__main__":
460
880
  main()
buildlog/core/__init__.py CHANGED
@@ -2,17 +2,33 @@
2
2
 
3
3
  from buildlog.core.operations import (
4
4
  DiffResult,
5
+ EndSessionResult,
5
6
  LearnFromReviewResult,
7
+ LogMistakeResult,
8
+ LogRewardResult,
9
+ Mistake,
6
10
  PromoteResult,
7
11
  RejectResult,
8
12
  ReviewIssue,
9
13
  ReviewLearning,
14
+ RewardEvent,
15
+ RewardSummary,
16
+ Session,
17
+ SessionMetrics,
18
+ StartSessionResult,
10
19
  StatusResult,
11
20
  diff,
21
+ end_session,
12
22
  find_skills_by_ids,
23
+ get_experiment_report,
24
+ get_rewards,
25
+ get_session_metrics,
13
26
  learn_from_review,
27
+ log_mistake,
28
+ log_reward,
14
29
  promote,
15
30
  reject,
31
+ start_session,
16
32
  status,
17
33
  )
18
34
 
@@ -24,10 +40,28 @@ __all__ = [
24
40
  "ReviewIssue",
25
41
  "ReviewLearning",
26
42
  "LearnFromReviewResult",
43
+ "RewardEvent",
44
+ "LogRewardResult",
45
+ "RewardSummary",
46
+ # Session tracking
47
+ "Session",
48
+ "Mistake",
49
+ "SessionMetrics",
50
+ "StartSessionResult",
51
+ "EndSessionResult",
52
+ "LogMistakeResult",
27
53
  "status",
28
54
  "promote",
29
55
  "reject",
30
56
  "diff",
31
57
  "find_skills_by_ids",
32
58
  "learn_from_review",
59
+ "log_reward",
60
+ "get_rewards",
61
+ # Session tracking operations
62
+ "start_session",
63
+ "end_session",
64
+ "log_mistake",
65
+ "get_session_metrics",
66
+ "get_experiment_report",
33
67
  ]