buildlog 0.6.1__py3-none-any.whl → 0.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. buildlog/__init__.py +1 -1
  2. buildlog/cli.py +589 -44
  3. buildlog/confidence.py +27 -0
  4. buildlog/core/__init__.py +12 -0
  5. buildlog/core/bandit.py +699 -0
  6. buildlog/core/operations.py +499 -11
  7. buildlog/distill.py +80 -1
  8. buildlog/engine/__init__.py +61 -0
  9. buildlog/engine/bandit.py +23 -0
  10. buildlog/engine/confidence.py +28 -0
  11. buildlog/engine/embeddings.py +28 -0
  12. buildlog/engine/experiments.py +619 -0
  13. buildlog/engine/types.py +31 -0
  14. buildlog/llm.py +461 -0
  15. buildlog/mcp/server.py +12 -6
  16. buildlog/mcp/tools.py +166 -13
  17. buildlog/render/__init__.py +19 -2
  18. buildlog/render/claude_md.py +74 -26
  19. buildlog/render/continue_dev.py +102 -0
  20. buildlog/render/copilot.py +100 -0
  21. buildlog/render/cursor.py +105 -0
  22. buildlog/render/tracking.py +20 -1
  23. buildlog/render/windsurf.py +95 -0
  24. buildlog/seeds.py +41 -0
  25. buildlog/skills.py +69 -6
  26. {buildlog-0.6.1.data → buildlog-0.8.0.data}/data/share/buildlog/copier.yml +0 -4
  27. buildlog-0.8.0.data/data/share/buildlog/template/buildlog/_TEMPLATE_QUICK.md +21 -0
  28. buildlog-0.8.0.dist-info/METADATA +151 -0
  29. buildlog-0.8.0.dist-info/RECORD +54 -0
  30. buildlog-0.6.1.dist-info/METADATA +0 -490
  31. buildlog-0.6.1.dist-info/RECORD +0 -41
  32. {buildlog-0.6.1.data → buildlog-0.8.0.data}/data/share/buildlog/post_gen.py +0 -0
  33. {buildlog-0.6.1.data → buildlog-0.8.0.data}/data/share/buildlog/template/buildlog/.gitkeep +0 -0
  34. {buildlog-0.6.1.data → buildlog-0.8.0.data}/data/share/buildlog/template/buildlog/2026-01-01-example.md +0 -0
  35. {buildlog-0.6.1.data → buildlog-0.8.0.data}/data/share/buildlog/template/buildlog/BUILDLOG_SYSTEM.md +0 -0
  36. {buildlog-0.6.1.data → buildlog-0.8.0.data}/data/share/buildlog/template/buildlog/_TEMPLATE.md +0 -0
  37. {buildlog-0.6.1.data → buildlog-0.8.0.data}/data/share/buildlog/template/buildlog/assets/.gitkeep +0 -0
  38. {buildlog-0.6.1.dist-info → buildlog-0.8.0.dist-info}/WHEEL +0 -0
  39. {buildlog-0.6.1.dist-info → buildlog-0.8.0.dist-info}/entry_points.txt +0 -0
  40. {buildlog-0.6.1.dist-info → buildlog-0.8.0.dist-info}/licenses/LICENSE +0 -0
@@ -14,6 +14,7 @@ from pathlib import Path
14
14
  from typing import Literal, TypedDict
15
15
 
16
16
  from buildlog.confidence import ConfidenceMetrics, merge_confidence_metrics
17
+ from buildlog.core.bandit import ThompsonSamplingBandit
17
18
  from buildlog.render import get_renderer
18
19
  from buildlog.skills import Skill, SkillSet, generate_skills
19
20
 
@@ -35,6 +36,9 @@ __all__ = [
35
36
  "StartSessionResult",
36
37
  "EndSessionResult",
37
38
  "LogMistakeResult",
39
+ # Gauntlet loop
40
+ "GauntletLoopResult",
41
+ "GauntletAcceptRiskResult",
38
42
  "status",
39
43
  "promote",
40
44
  "reject",
@@ -49,6 +53,10 @@ __all__ = [
49
53
  "log_mistake",
50
54
  "get_session_metrics",
51
55
  "get_experiment_report",
56
+ "get_bandit_status",
57
+ # Gauntlet loop operations
58
+ "gauntlet_process_issues",
59
+ "gauntlet_accept_risk",
52
60
  ]
53
61
 
54
62
 
@@ -552,7 +560,7 @@ def status(
552
560
  def promote(
553
561
  buildlog_dir: Path,
554
562
  skill_ids: list[str],
555
- target: Literal["claude_md", "settings_json", "skill"] = "claude_md",
563
+ target: str = "claude_md",
556
564
  target_path: Path | None = None,
557
565
  ) -> PromoteResult:
558
566
  """Promote skills to agent rules.
@@ -560,7 +568,8 @@ def promote(
560
568
  Args:
561
569
  buildlog_dir: Path to buildlog directory.
562
570
  skill_ids: List of skill IDs to promote.
563
- target: Where to write rules ("claude_md", "settings_json", or "skill").
571
+ target: Where to write rules. One of: claude_md, settings_json,
572
+ skill, cursor, copilot, windsurf, continue_dev.
564
573
  target_path: Optional custom path for the target file.
565
574
 
566
575
  Returns:
@@ -932,14 +941,27 @@ def log_reward(
932
941
  ) -> LogRewardResult:
933
942
  """Log a reward event for bandit learning.
934
943
 
935
- Appends to reward_events.jsonl for later analysis.
944
+ This is where the bandit learns from EXPLICIT feedback:
945
+
946
+ The reward signal comes from the outcome:
947
+ - accepted (reward=1.0): Rules helped produce good output
948
+ - rejected (reward=0.0): Rules failed to prevent bad output
949
+ - revision (reward=1-distance): Partial credit based on correction needed
950
+
951
+ Unlike log_mistake() which gives implicit negative feedback, this allows
952
+ direct positive feedback when rules DO help. This is crucial for learning
953
+ which rules are genuinely effective, not just which ones don't fail.
954
+
955
+ Appends to reward_events.jsonl for analysis AND updates the bandit.
936
956
 
937
957
  Args:
938
958
  buildlog_dir: Path to buildlog directory.
939
959
  outcome: Type of feedback (accepted/revision/rejected).
940
960
  rules_active: List of rule IDs that were in context.
961
+ If None, tries to use session's selected_rules.
941
962
  revision_distance: How much correction was needed (0-1, for revisions).
942
963
  error_class: Category of error if applicable.
964
+ If None, tries to use session's error_class.
943
965
  notes: Optional notes about the feedback.
944
966
  source: Where this feedback came from.
945
967
 
@@ -950,6 +972,15 @@ def log_reward(
950
972
  reward_id = _generate_reward_id(outcome, now)
951
973
  reward_value = _compute_reward_value(outcome, revision_distance)
952
974
 
975
+ # Try to get rules and context from active session if not provided
976
+ active_path = _get_active_session_path(buildlog_dir)
977
+ if active_path.exists():
978
+ session_data = json.loads(active_path.read_text())
979
+ if rules_active is None:
980
+ rules_active = session_data.get("selected_rules", [])
981
+ if error_class is None:
982
+ error_class = session_data.get("error_class")
983
+
953
984
  event = RewardEvent(
954
985
  id=reward_id,
955
986
  timestamp=now,
@@ -969,6 +1000,32 @@ def log_reward(
969
1000
  with open(rewards_path, "a") as f:
970
1001
  f.write(json.dumps(event.to_dict()) + "\n")
971
1002
 
1003
+ # =========================================================================
1004
+ # BANDIT LEARNING: Update with explicit reward
1005
+ # =========================================================================
1006
+ #
1007
+ # For accepted (reward=1): Beta(α, β) → Beta(α + 1, β)
1008
+ # → Distribution shifts RIGHT, increasing expected value
1009
+ # → Rule becomes MORE likely to be selected
1010
+ #
1011
+ # For rejected (reward=0): Beta(α, β) → Beta(α, β + 1)
1012
+ # → Distribution shifts LEFT, decreasing expected value
1013
+ # → Rule becomes LESS likely to be selected
1014
+ #
1015
+ # For revision (0 < reward < 1): Both α and β increase proportionally
1016
+ # → Distribution narrows (more confident) with moderate expected value
1017
+ # =========================================================================
1018
+
1019
+ if rules_active:
1020
+ bandit_path = buildlog_dir / "bandit_state.jsonl"
1021
+ bandit = ThompsonSamplingBandit(bandit_path)
1022
+
1023
+ bandit.batch_update(
1024
+ rule_ids=rules_active,
1025
+ reward=reward_value,
1026
+ context=error_class or "general",
1027
+ )
1028
+
972
1029
  # Count total events
973
1030
  total_events = 0
974
1031
  if rewards_path.exists():
@@ -976,11 +1033,16 @@ def log_reward(
976
1033
  1 for line in rewards_path.read_text().strip().split("\n") if line
977
1034
  )
978
1035
 
1036
+ rules_count = len(rules_active) if rules_active else 0
1037
+ message = f"Logged {outcome} (reward={reward_value:.2f})"
1038
+ if rules_count > 0:
1039
+ message += f" | Updated bandit: {rules_count} rules"
1040
+
979
1041
  return LogRewardResult(
980
1042
  reward_id=reward_id,
981
1043
  reward_value=reward_value,
982
1044
  total_events=total_events,
983
- message=f"Logged {outcome} (reward={reward_value:.2f})",
1045
+ message=message,
984
1046
  )
985
1047
 
986
1048
 
@@ -1055,6 +1117,7 @@ class SessionDict(TypedDict, total=False):
1055
1117
  entry_file: str | None
1056
1118
  rules_at_start: list[str]
1057
1119
  rules_at_end: list[str]
1120
+ selected_rules: list[str] # Bandit-selected subset for this session
1058
1121
  error_class: str | None
1059
1122
  notes: str | None
1060
1123
 
@@ -1064,15 +1127,17 @@ class Session:
1064
1127
  """A coding session for experiment tracking.
1065
1128
 
1066
1129
  Tracks the state of rules before and after a session to measure
1067
- learning effectiveness.
1130
+ learning effectiveness. The bandit selects a subset of rules
1131
+ (selected_rules) to be "active" for this session based on context.
1068
1132
 
1069
1133
  Attributes:
1070
1134
  id: Unique identifier for this session.
1071
1135
  started_at: When the session started.
1072
1136
  ended_at: When the session ended (None if still active).
1073
1137
  entry_file: Corresponding buildlog entry file, if any.
1074
- rules_at_start: Rule IDs active at session start.
1075
- rules_at_end: Rule IDs active at session end.
1138
+ rules_at_start: All rule IDs available at session start.
1139
+ rules_at_end: All rule IDs available at session end.
1140
+ selected_rules: Bandit-selected subset active for this session.
1076
1141
  error_class: Error class being targeted (e.g., "missing_test").
1077
1142
  notes: Optional notes about the session.
1078
1143
  """
@@ -1083,6 +1148,7 @@ class Session:
1083
1148
  entry_file: str | None = None
1084
1149
  rules_at_start: list[str] = field(default_factory=list)
1085
1150
  rules_at_end: list[str] = field(default_factory=list)
1151
+ selected_rules: list[str] = field(default_factory=list)
1086
1152
  error_class: str | None = None
1087
1153
  notes: str | None = None
1088
1154
 
@@ -1095,6 +1161,8 @@ class Session:
1095
1161
  "rules_at_start": self.rules_at_start,
1096
1162
  "rules_at_end": self.rules_at_end,
1097
1163
  }
1164
+ if self.selected_rules:
1165
+ result["selected_rules"] = self.selected_rules
1098
1166
  if self.entry_file is not None:
1099
1167
  result["entry_file"] = self.entry_file
1100
1168
  if self.error_class is not None:
@@ -1124,6 +1192,7 @@ class Session:
1124
1192
  entry_file=data.get("entry_file"),
1125
1193
  rules_at_start=data.get("rules_at_start", []),
1126
1194
  rules_at_end=data.get("rules_at_end", []),
1195
+ selected_rules=data.get("selected_rules", []),
1127
1196
  error_class=data.get("error_class"),
1128
1197
  notes=data.get("notes"),
1129
1198
  )
@@ -1227,11 +1296,15 @@ class SessionMetrics:
1227
1296
 
1228
1297
  @dataclass
1229
1298
  class StartSessionResult:
1230
- """Result of starting a new session."""
1299
+ """Result of starting a new session.
1300
+
1301
+ Includes both the full rule set and the bandit-selected subset.
1302
+ """
1231
1303
 
1232
1304
  session_id: str
1233
1305
  error_class: str | None
1234
1306
  rules_count: int
1307
+ selected_rules: list[str] # Bandit-selected rules for this session
1235
1308
  message: str
1236
1309
 
1237
1310
 
@@ -1310,6 +1383,31 @@ def _get_current_rules(buildlog_dir: Path) -> list[str]:
1310
1383
  return list(_load_json_set(promoted_path, "skill_ids"))
1311
1384
 
1312
1385
 
1386
+ def _get_seed_rule_ids(buildlog_dir: Path) -> set[str]:
1387
+ """Get IDs of rules that come from seed personas.
1388
+
1389
+ Seed rules (from gauntlet personas like Test Terrorist, Security Karen)
1390
+ have non-empty persona_tags. These rules get boosted priors in the
1391
+ bandit because they represent curated, expert knowledge.
1392
+
1393
+ Returns:
1394
+ Set of rule IDs that have persona_tags.
1395
+ """
1396
+ try:
1397
+ skill_set = generate_skills(buildlog_dir)
1398
+ seed_ids: set[str] = set()
1399
+
1400
+ for category_skills in skill_set.skills.values():
1401
+ for skill in category_skills:
1402
+ if skill.persona_tags: # Non-empty means it's from a seed
1403
+ seed_ids.add(skill.id)
1404
+
1405
+ return seed_ids
1406
+ except Exception:
1407
+ # If skill generation fails, treat no rules as seeds
1408
+ return set()
1409
+
1410
+
1313
1411
  def _load_sessions(buildlog_dir: Path) -> list[Session]:
1314
1412
  """Load all sessions from JSONL file."""
1315
1413
  sessions_path = _get_sessions_path(buildlog_dir)
@@ -1383,25 +1481,78 @@ def start_session(
1383
1481
  buildlog_dir: Path,
1384
1482
  error_class: str | None = None,
1385
1483
  notes: str | None = None,
1484
+ select_k: int = 3,
1386
1485
  ) -> StartSessionResult:
1387
- """Start a new experiment session.
1486
+ """Start a new experiment session with bandit-selected rules.
1487
+
1488
+ This is where Thompson Sampling kicks in:
1489
+
1490
+ 1. Load all available rules (candidates)
1491
+ 2. Identify which rules are from seeds (get boosted priors)
1492
+ 3. Use bandit to select top-k rules for this error_class context
1493
+ 4. Store selected rules in session for later attribution
1494
+
1495
+ The selected rules are the ones "active" for this session. When a
1496
+ mistake occurs, we'll give negative feedback to these rules (they
1497
+ didn't prevent the mistake). This teaches the bandit which rules
1498
+ are effective for which error classes.
1388
1499
 
1389
1500
  Args:
1390
1501
  buildlog_dir: Path to buildlog directory.
1391
1502
  error_class: Error class being targeted (e.g., "missing_test").
1503
+ This is the CONTEXT for contextual bandits - rules
1504
+ are evaluated per-context.
1392
1505
  notes: Optional notes about the session.
1506
+ select_k: Number of rules to select via Thompson Sampling.
1507
+ Default 3 balances coverage with attribution clarity.
1393
1508
 
1394
1509
  Returns:
1395
- StartSessionResult with session ID and current rules count.
1510
+ StartSessionResult with session ID, rules count, and selected rules.
1396
1511
  """
1397
1512
  now = datetime.now(timezone.utc)
1398
1513
  session_id = _generate_session_id(now)
1399
1514
  current_rules = _get_current_rules(buildlog_dir)
1400
1515
 
1516
+ # =========================================================================
1517
+ # THOMPSON SAMPLING: Select rules for this session
1518
+ # =========================================================================
1519
+ #
1520
+ # The bandit maintains a Beta distribution for each (context, rule) pair.
1521
+ # At session start, we SAMPLE from each distribution and pick the top-k.
1522
+ #
1523
+ # Why sample instead of using the mean?
1524
+ # - Arms we're uncertain about have high variance
1525
+ # - High variance means occasional high samples
1526
+ # - This causes us to explore uncertain arms
1527
+ # - As we gather data, variance shrinks, and we exploit
1528
+ #
1529
+ # This is the elegant explore-exploit balance of Thompson Sampling.
1530
+ # =========================================================================
1531
+
1532
+ selected_rules: list[str] = []
1533
+
1534
+ if current_rules:
1535
+ # Initialize bandit
1536
+ bandit_path = buildlog_dir / "bandit_state.jsonl"
1537
+ bandit = ThompsonSamplingBandit(bandit_path)
1538
+
1539
+ # Identify seed rules (those with persona_tags from gauntlet)
1540
+ # Seeds get boosted priors - we believe curated rules are good
1541
+ seed_rule_ids = _get_seed_rule_ids(buildlog_dir)
1542
+
1543
+ # SELECT: Sample from Beta distributions, pick top-k
1544
+ selected_rules = bandit.select(
1545
+ candidates=current_rules,
1546
+ context=error_class or "general",
1547
+ k=min(select_k, len(current_rules)),
1548
+ seed_rule_ids=seed_rule_ids,
1549
+ )
1550
+
1401
1551
  session = Session(
1402
1552
  id=session_id,
1403
1553
  started_at=now,
1404
1554
  rules_at_start=current_rules,
1555
+ selected_rules=selected_rules,
1405
1556
  error_class=error_class,
1406
1557
  notes=notes,
1407
1558
  )
@@ -1415,7 +1566,11 @@ def start_session(
1415
1566
  session_id=session_id,
1416
1567
  error_class=error_class,
1417
1568
  rules_count=len(current_rules),
1418
- message=f"Started session {session_id} with {len(current_rules)} active rules",
1569
+ selected_rules=selected_rules,
1570
+ message=(
1571
+ f"Started session {session_id}: selected {len(selected_rules)}/"
1572
+ f"{len(current_rules)} rules via Thompson Sampling"
1573
+ ),
1419
1574
  )
1420
1575
 
1421
1576
 
@@ -1487,6 +1642,16 @@ def log_mistake(
1487
1642
  ) -> LogMistakeResult:
1488
1643
  """Log a mistake during an experiment session.
1489
1644
 
1645
+ This is where the bandit learns from NEGATIVE feedback:
1646
+
1647
+ When a mistake occurs, the selected rules for this session FAILED
1648
+ to prevent it. We update the bandit with reward=0 for each selected
1649
+ rule, teaching it that these rules aren't effective for this context.
1650
+
1651
+ Over time, rules that consistently fail to prevent mistakes will
1652
+ have their Beta distributions shift left (lower expected value),
1653
+ and the bandit will stop selecting them.
1654
+
1490
1655
  Args:
1491
1656
  buildlog_dir: Path to buildlog directory.
1492
1657
  error_class: Category of error (e.g., "missing_test").
@@ -1533,9 +1698,39 @@ def log_mistake(
1533
1698
  with open(mistakes_path, "a") as f:
1534
1699
  f.write(json.dumps(mistake.to_dict()) + "\n")
1535
1700
 
1701
+ # =========================================================================
1702
+ # BANDIT LEARNING: Negative feedback for selected rules
1703
+ # =========================================================================
1704
+ #
1705
+ # The selected rules were supposed to help prevent mistakes. A mistake
1706
+ # occurred anyway, so we give them reward=0 (failure).
1707
+ #
1708
+ # Bayesian update: Beta(α, β) → Beta(α + 0, β + 1) = Beta(α, β + 1)
1709
+ #
1710
+ # This shifts the distribution LEFT, decreasing the expected value.
1711
+ # Rules that repeatedly fail will become less likely to be selected.
1712
+ # =========================================================================
1713
+
1714
+ selected_rules = session_data.get("selected_rules", [])
1715
+ if selected_rules:
1716
+ bandit_path = buildlog_dir / "bandit_state.jsonl"
1717
+ bandit = ThompsonSamplingBandit(bandit_path)
1718
+
1719
+ # Use session's error_class as context, not the mistake's
1720
+ # (they should match, but session context is authoritative)
1721
+ context = session_data.get("error_class") or "general"
1722
+
1723
+ bandit.batch_update(
1724
+ rule_ids=selected_rules,
1725
+ reward=0.0, # Failure: rules didn't prevent mistake
1726
+ context=context,
1727
+ )
1728
+
1536
1729
  message = f"Logged mistake: {error_class}"
1537
1730
  if similar:
1538
1731
  message += f" (REPEAT of {similar.id})"
1732
+ if selected_rules:
1733
+ message += f" | Updated bandit: {len(selected_rules)} rules got reward=0"
1539
1734
 
1540
1735
  return LogMistakeResult(
1541
1736
  mistake_id=mistake_id,
@@ -1652,3 +1847,296 @@ def get_experiment_report(buildlog_dir: Path) -> dict:
1652
1847
  "sessions": session_metrics,
1653
1848
  "error_classes": error_classes,
1654
1849
  }
1850
+
1851
+
1852
+ def get_bandit_status(
1853
+ buildlog_dir: Path,
1854
+ context: str | None = None,
1855
+ top_k: int = 10,
1856
+ ) -> dict:
1857
+ """Get current bandit state and statistics.
1858
+
1859
+ Provides insight into the Thompson Sampling bandit's learned beliefs.
1860
+ Useful for debugging and understanding which rules are being favored.
1861
+
1862
+ Args:
1863
+ buildlog_dir: Path to buildlog directory.
1864
+ context: Specific error class to show. If None, shows all contexts.
1865
+ top_k: Number of top rules to show per context.
1866
+
1867
+ Returns:
1868
+ Dictionary with:
1869
+ - summary: Overall bandit statistics
1870
+ - contexts: Per-context rule rankings
1871
+ - top_rules: Top rules by expected value per context
1872
+ """
1873
+ bandit_path = buildlog_dir / "bandit_state.jsonl"
1874
+ bandit = ThompsonSamplingBandit(bandit_path)
1875
+
1876
+ stats = bandit.get_stats(context)
1877
+
1878
+ # Group stats by context
1879
+ contexts: dict[str, list[dict]] = {}
1880
+ for key, rule_stats in stats.items():
1881
+ ctx = rule_stats["context"]
1882
+ if ctx not in contexts:
1883
+ contexts[ctx] = []
1884
+ contexts[ctx].append(
1885
+ {
1886
+ "rule_id": key.split(":")[-1] if ":" in key else key,
1887
+ **{k: v for k, v in rule_stats.items() if k != "context"},
1888
+ }
1889
+ )
1890
+
1891
+ # Sort by mean (descending) and take top_k
1892
+ top_rules: dict[str, list[dict]] = {}
1893
+ for ctx, rules in contexts.items():
1894
+ sorted_rules = sorted(rules, key=lambda x: x["mean"], reverse=True)
1895
+ top_rules[ctx] = sorted_rules[:top_k]
1896
+
1897
+ # Summary stats
1898
+ total_arms = sum(len(rules) for rules in contexts.values())
1899
+ total_observations = sum(
1900
+ rule.get("total_observations", 0)
1901
+ for rules in contexts.values()
1902
+ for rule in rules
1903
+ )
1904
+
1905
+ return {
1906
+ "summary": {
1907
+ "total_contexts": len(contexts),
1908
+ "total_arms": total_arms,
1909
+ "total_observations": total_observations,
1910
+ "state_file": str(bandit_path),
1911
+ },
1912
+ "top_rules": top_rules,
1913
+ "all_rules": contexts if context else None, # Only include all if filtering
1914
+ }
1915
+
1916
+
1917
+ # =============================================================================
1918
+ # Gauntlet Loop Operations
1919
+ # =============================================================================
1920
+
1921
+
1922
+ @dataclass
1923
+ class GauntletLoopResult:
1924
+ """Result of processing gauntlet issues.
1925
+
1926
+ Attributes:
1927
+ action: What to do next:
1928
+ - "fix_criticals": Criticals remain, auto-fix and loop
1929
+ - "checkpoint_majors": No criticals, but majors remain (HITL)
1930
+ - "checkpoint_minors": Only minors remain (HITL)
1931
+ - "clean": No issues remain
1932
+ criticals: List of critical severity issues
1933
+ majors: List of major severity issues
1934
+ minors: List of minor/nitpick severity issues
1935
+ iteration: Current iteration number
1936
+ learnings_persisted: Number of learnings persisted this iteration
1937
+ message: Human-readable summary
1938
+ """
1939
+
1940
+ action: Literal["fix_criticals", "checkpoint_majors", "checkpoint_minors", "clean"]
1941
+ criticals: list[dict]
1942
+ majors: list[dict]
1943
+ minors: list[dict]
1944
+ iteration: int
1945
+ learnings_persisted: int
1946
+ message: str
1947
+
1948
+
1949
+ @dataclass
1950
+ class GauntletAcceptRiskResult:
1951
+ """Result of accepting risk with remaining issues.
1952
+
1953
+ Attributes:
1954
+ accepted_issues: Number of issues accepted as risk
1955
+ github_issues_created: Number of GitHub issues created (if enabled)
1956
+ github_issue_urls: URLs of created GitHub issues
1957
+ message: Human-readable summary
1958
+ error: Error message if operation failed
1959
+ """
1960
+
1961
+ accepted_issues: int
1962
+ github_issues_created: int
1963
+ github_issue_urls: list[str]
1964
+ message: str
1965
+ error: str | None = None
1966
+
1967
+
1968
+ def gauntlet_process_issues(
1969
+ buildlog_dir: Path,
1970
+ issues: list[dict],
1971
+ iteration: int = 1,
1972
+ source: str | None = None,
1973
+ ) -> GauntletLoopResult:
1974
+ """Process gauntlet issues and determine next action.
1975
+
1976
+ Categorizes issues by severity, persists learnings, and returns
1977
+ the appropriate next action for the gauntlet loop.
1978
+
1979
+ Args:
1980
+ buildlog_dir: Path to buildlog directory.
1981
+ issues: List of issues from the gauntlet review.
1982
+ iteration: Current iteration number (for tracking).
1983
+ source: Optional source identifier for learnings.
1984
+
1985
+ Returns:
1986
+ GauntletLoopResult with categorized issues and next action.
1987
+ """
1988
+ # Categorize by severity
1989
+ criticals = [i for i in issues if i.get("severity") == "critical"]
1990
+ majors = [i for i in issues if i.get("severity") == "major"]
1991
+ minors = [i for i in issues if i.get("severity") in ("minor", "nitpick", None)]
1992
+
1993
+ # Persist learnings for this iteration
1994
+ learn_source = source or f"gauntlet:iteration-{iteration}"
1995
+ learn_result = learn_from_review(buildlog_dir, issues, learn_source)
1996
+ learnings_persisted = len(learn_result.new_learnings) + len(
1997
+ learn_result.reinforced_learnings
1998
+ )
1999
+
2000
+ # Determine action
2001
+ if criticals:
2002
+ action: Literal[
2003
+ "fix_criticals", "checkpoint_majors", "checkpoint_minors", "clean"
2004
+ ] = "fix_criticals"
2005
+ message = (
2006
+ f"Iteration {iteration}: {len(criticals)} critical, "
2007
+ f"{len(majors)} major, {len(minors)} minor. "
2008
+ f"Fix criticals (and majors) then re-run."
2009
+ )
2010
+ elif majors:
2011
+ action = "checkpoint_majors"
2012
+ message = (
2013
+ f"Iteration {iteration}: No criticals! "
2014
+ f"{len(majors)} major, {len(minors)} minor remain. "
2015
+ f"Continue clearing majors?"
2016
+ )
2017
+ elif minors:
2018
+ action = "checkpoint_minors"
2019
+ message = (
2020
+ f"Iteration {iteration}: Only {len(minors)} minor issues remain. "
2021
+ f"Accept risk or continue?"
2022
+ )
2023
+ else:
2024
+ action = "clean"
2025
+ message = f"Iteration {iteration}: All clear! No issues found."
2026
+
2027
+ return GauntletLoopResult(
2028
+ action=action,
2029
+ criticals=criticals,
2030
+ majors=majors,
2031
+ minors=minors,
2032
+ iteration=iteration,
2033
+ learnings_persisted=learnings_persisted,
2034
+ message=message,
2035
+ )
2036
+
2037
+
2038
+ def gauntlet_accept_risk(
2039
+ remaining_issues: list[dict],
2040
+ create_github_issues: bool = False,
2041
+ repo: str | None = None,
2042
+ ) -> GauntletAcceptRiskResult:
2043
+ """Accept risk for remaining issues, optionally creating GitHub issues.
2044
+
2045
+ Args:
2046
+ remaining_issues: Issues being accepted as risk.
2047
+ create_github_issues: Whether to create GitHub issues for tracking.
2048
+ repo: Repository for GitHub issues (uses current repo if None).
2049
+
2050
+ Returns:
2051
+ GauntletAcceptRiskResult with created issue info.
2052
+ """
2053
+ import subprocess
2054
+
2055
+ github_urls: list[str] = []
2056
+ error: str | None = None
2057
+
2058
+ if create_github_issues and remaining_issues:
2059
+ for issue in remaining_issues:
2060
+ severity = issue.get("severity", "minor")
2061
+ rule = issue.get("rule_learned", issue.get("description", "Unknown"))
2062
+ description = issue.get("description", "")
2063
+ location = issue.get("location", "")
2064
+
2065
+ # Sanitize inputs for GitHub issue creation
2066
+ # Note: We use list args (not shell=True), so this is defense-in-depth
2067
+ def _sanitize_for_gh(text: str, max_len: int = 256) -> str:
2068
+ """Sanitize text for GitHub issue fields."""
2069
+ # Remove/replace problematic characters
2070
+ sanitized = text.replace("\n", " ").replace("\r", " ")
2071
+ # Truncate to max length
2072
+ if len(sanitized) > max_len:
2073
+ sanitized = sanitized[: max_len - 3] + "..."
2074
+ return sanitized.strip()
2075
+
2076
+ safe_severity = _sanitize_for_gh(str(severity), 20)
2077
+ safe_rule = _sanitize_for_gh(str(rule), 200)
2078
+ safe_description = _sanitize_for_gh(str(description), 1000)
2079
+ safe_location = _sanitize_for_gh(str(location), 100)
2080
+
2081
+ # Build issue body
2082
+ body_parts = [
2083
+ f"**Severity:** {safe_severity}",
2084
+ f"**Rule:** {safe_rule}",
2085
+ "",
2086
+ "## Description",
2087
+ safe_description,
2088
+ ]
2089
+ if safe_location:
2090
+ body_parts.extend(["", f"**Location:** `{safe_location}`"])
2091
+
2092
+ body_parts.extend(
2093
+ [
2094
+ "",
2095
+ "---",
2096
+ "_Created by buildlog gauntlet loop (accepted risk)_",
2097
+ ]
2098
+ )
2099
+
2100
+ body = "\n".join(body_parts)
2101
+ title = f"[Gauntlet/{safe_severity}] {safe_rule[:60]}"
2102
+
2103
+ # Create GitHub issue
2104
+ cmd = [
2105
+ "gh",
2106
+ "issue",
2107
+ "create",
2108
+ "--title",
2109
+ title,
2110
+ "--body",
2111
+ body,
2112
+ "--label",
2113
+ severity,
2114
+ ]
2115
+ if repo:
2116
+ cmd.extend(["--repo", repo])
2117
+
2118
+ try:
2119
+ result = subprocess.run(cmd, capture_output=True, text=True, check=True)
2120
+ # gh issue create outputs the URL
2121
+ url = result.stdout.strip()
2122
+ if url:
2123
+ github_urls.append(url)
2124
+ except subprocess.CalledProcessError as e:
2125
+ # Don't fail entirely, just note the error
2126
+ error = f"Failed to create some GitHub issues: {e.stderr}"
2127
+ except FileNotFoundError:
2128
+ error = "gh CLI not found. Install GitHub CLI to create issues."
2129
+ break
2130
+
2131
+ return GauntletAcceptRiskResult(
2132
+ accepted_issues=len(remaining_issues),
2133
+ github_issues_created=len(github_urls),
2134
+ github_issue_urls=github_urls,
2135
+ message=(
2136
+ f"Accepted {len(remaining_issues)} issues as risk. "
2137
+ f"Created {len(github_urls)} GitHub issues."
2138
+ if create_github_issues
2139
+ else f"Accepted {len(remaining_issues)} issues as risk."
2140
+ ),
2141
+ error=error,
2142
+ )