buildlog 0.6.0__tar.gz → 0.7.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. {buildlog-0.6.0 → buildlog-0.7.0}/.gitignore +8 -0
  2. {buildlog-0.6.0 → buildlog-0.7.0}/PKG-INFO +71 -17
  3. {buildlog-0.6.0 → buildlog-0.7.0}/README.md +70 -16
  4. {buildlog-0.6.0 → buildlog-0.7.0}/pyproject.toml +1 -1
  5. {buildlog-0.6.0 → buildlog-0.7.0}/src/buildlog/cli.py +185 -20
  6. {buildlog-0.6.0 → buildlog-0.7.0}/src/buildlog/core/__init__.py +10 -0
  7. {buildlog-0.6.0 → buildlog-0.7.0}/src/buildlog/core/operations.py +234 -0
  8. buildlog-0.7.0/src/buildlog/data/seeds/security_karen.yaml +162 -0
  9. buildlog-0.7.0/src/buildlog/data/seeds/test_terrorist.yaml +280 -0
  10. {buildlog-0.6.0 → buildlog-0.7.0}/src/buildlog/mcp/server.py +6 -0
  11. {buildlog-0.6.0 → buildlog-0.7.0}/src/buildlog/mcp/tools.py +105 -0
  12. {buildlog-0.6.0 → buildlog-0.7.0}/src/buildlog/render/claude_md.py +17 -4
  13. {buildlog-0.6.0 → buildlog-0.7.0}/src/buildlog/render/tracking.py +20 -1
  14. {buildlog-0.6.0 → buildlog-0.7.0}/src/buildlog/seeds.py +91 -0
  15. buildlog-0.7.0/template/buildlog/assets/.gitkeep +0 -0
  16. {buildlog-0.6.0 → buildlog-0.7.0}/LICENSE +0 -0
  17. {buildlog-0.6.0 → buildlog-0.7.0}/copier.yml +0 -0
  18. {buildlog-0.6.0 → buildlog-0.7.0}/post_gen.py +0 -0
  19. {buildlog-0.6.0 → buildlog-0.7.0}/src/buildlog/__init__.py +0 -0
  20. {buildlog-0.6.0 → buildlog-0.7.0}/src/buildlog/confidence.py +0 -0
  21. /buildlog-0.6.0/template/buildlog/.gitkeep → /buildlog-0.7.0/src/buildlog/data/__init__.py +0 -0
  22. {buildlog-0.6.0 → buildlog-0.7.0}/src/buildlog/distill.py +0 -0
  23. {buildlog-0.6.0 → buildlog-0.7.0}/src/buildlog/embeddings.py +0 -0
  24. {buildlog-0.6.0 → buildlog-0.7.0}/src/buildlog/mcp/__init__.py +0 -0
  25. {buildlog-0.6.0 → buildlog-0.7.0}/src/buildlog/render/__init__.py +0 -0
  26. {buildlog-0.6.0 → buildlog-0.7.0}/src/buildlog/render/base.py +0 -0
  27. {buildlog-0.6.0 → buildlog-0.7.0}/src/buildlog/render/settings_json.py +0 -0
  28. {buildlog-0.6.0 → buildlog-0.7.0}/src/buildlog/render/skill.py +0 -0
  29. {buildlog-0.6.0 → buildlog-0.7.0}/src/buildlog/seed_engine/__init__.py +0 -0
  30. {buildlog-0.6.0 → buildlog-0.7.0}/src/buildlog/seed_engine/categorizers.py +0 -0
  31. {buildlog-0.6.0 → buildlog-0.7.0}/src/buildlog/seed_engine/extractors.py +0 -0
  32. {buildlog-0.6.0 → buildlog-0.7.0}/src/buildlog/seed_engine/generators.py +0 -0
  33. {buildlog-0.6.0 → buildlog-0.7.0}/src/buildlog/seed_engine/models.py +0 -0
  34. {buildlog-0.6.0 → buildlog-0.7.0}/src/buildlog/seed_engine/pipeline.py +0 -0
  35. {buildlog-0.6.0 → buildlog-0.7.0}/src/buildlog/seed_engine/sources.py +0 -0
  36. {buildlog-0.6.0 → buildlog-0.7.0}/src/buildlog/skills.py +0 -0
  37. {buildlog-0.6.0 → buildlog-0.7.0}/src/buildlog/stats.py +0 -0
  38. {buildlog-0.6.0/template/buildlog/assets → buildlog-0.7.0/template/buildlog}/.gitkeep +0 -0
  39. {buildlog-0.6.0 → buildlog-0.7.0}/template/buildlog/2026-01-01-example.md +0 -0
  40. {buildlog-0.6.0 → buildlog-0.7.0}/template/buildlog/BUILDLOG_SYSTEM.md +0 -0
  41. {buildlog-0.6.0 → buildlog-0.7.0}/template/buildlog/_TEMPLATE.md +0 -0
@@ -44,3 +44,11 @@ htmlcov/
44
44
 
45
45
  # Build artifacts
46
46
  *.whl
47
+
48
+ # Development artifacts
49
+ CHAT.txt
50
+ results/
51
+ sketches/
52
+
53
+ # buildlog runtime data (in project root, not in src/)
54
+ buildlog/.buildlog/
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: buildlog
3
- Version: 0.6.0
3
+ Version: 0.7.0
4
4
  Summary: Engineering notebook for AI-assisted development
5
5
  Project-URL: Homepage, https://github.com/Peleke/buildlog-template
6
6
  Project-URL: Repository, https://github.com/Peleke/buildlog-template
@@ -123,11 +123,30 @@ RMR is not the only metric that matters. But it's one we can measure, and measur
123
123
 
124
124
  ## The Mechanism
125
125
 
126
- buildlog uses **contextual bandits** to select which rules to surface.
126
+ buildlog is building toward **contextual bandits** for automatic rule selection. Here's where we are:
127
+
128
+ ### What Exists Today (v0.7)
127
129
 
128
130
  ```
129
131
  ┌─────────────────────────────────────────────────────────────────┐
130
- CONTEXTUAL BANDIT SETUP
132
+ CURRENT INFRASTRUCTURE
133
+ ├─────────────────────────────────────────────────────────────────┤
134
+ │ │
135
+ │ ✅ Rule extraction From entries, reviews, curated seeds │
136
+ │ ✅ Confidence scoring Frequency + recency based │
137
+ │ ✅ Reward logging Accept/reject/revision signals │
138
+ │ ✅ Experiment tracking Sessions, mistakes, RMR calculation │
139
+ │ ✅ Review gauntlet Curated persona-based code review │
140
+ │ ⏳ Manual promotion Human selects rules to surface │
141
+ │ │
142
+ └─────────────────────────────────────────────────────────────────┘
143
+ ```
144
+
145
+ ### What's Coming (v0.8+)
146
+
147
+ ```
148
+ ┌─────────────────────────────────────────────────────────────────┐
149
+ │ CONTEXTUAL BANDIT (PLANNED) │
131
150
  ├─────────────────────────────────────────────────────────────────┤
132
151
  │ │
133
152
  │ Context (c): Error class, file type, task category │
@@ -147,9 +166,9 @@ buildlog uses **contextual bandits** to select which rules to surface.
147
166
 
148
167
  **Reward** = did surfacing this rule actually help?
149
168
 
150
- The system explores (tries uncertain rules) and exploits (uses proven rules) based on accumulated evidence. Thompson Sampling provides theoretical guarantees: O(√(KT log K)) regret bounds.
169
+ The reward infrastructure exists. The bandit policy is next. Thompson Sampling will provide theoretical guarantees: O(√(KT log K)) regret bounds.
151
170
 
152
- This isn't magic. It's a well-understood framework with decades of research. We're applying it to agent rule selection.
171
+ We're building in public—the bandit implementation will be developed with full documentation of the process.
153
172
 
154
173
  ---
155
174
 
@@ -161,16 +180,20 @@ buildlog captures signal at every stage:
161
180
  flowchart LR
162
181
  A["Work Sessions"] --> B["Structured Entries"]
163
182
  B --> C["Extracted Rules"]
164
- C --> D["Bandit Selection"]
183
+ C --> D["Manual Promotion"]
165
184
  D --> E["Rule Surfaced"]
166
185
  E --> F["Human Feedback"]
167
- F --> G["Posterior Update"]
168
- G --> D
186
+ F --> G["Reward Logged"]
187
+ G -.-> H["Bandit Policy"]
188
+ H -.-> D
169
189
 
170
190
  style F fill:#ff6b6b,color:#fff
171
191
  style G fill:#4ecdc4,color:#fff
192
+ style H fill:#666,color:#fff,stroke-dasharray: 5 5
172
193
  ```
173
194
 
195
+ *Dashed: Coming in v0.8 — automatic rule selection via Thompson Sampling*
196
+
174
197
  ### Stage 1: Capture
175
198
  Document your work. Include the fuckups—they're the most valuable signal.
176
199
 
@@ -269,6 +292,27 @@ buildlog gauntlet rules --format markdown -o review_checklist.md
269
292
  buildlog gauntlet learn review_issues.json --source "PR#42"
270
293
  ```
271
294
 
295
+ ### Gauntlet Loop (Agent Integration)
296
+
297
+ For AI agents, the gauntlet loop automates the fix-rerun cycle:
298
+
299
+ ```bash
300
+ buildlog gauntlet loop src/ --persona security_karen --persona test_terrorist
301
+ ```
302
+
303
+ The loop provides structured checkpoints:
304
+
305
+ | Severity | Action | Human Needed? |
306
+ |----------|--------|---------------|
307
+ | **Critical** | Agent fixes, reruns | No |
308
+ | **Major** | Checkpoint: continue? | Yes |
309
+ | **Minor** | Accept risk or fix? | Yes |
310
+ | **Clean** | Done | No |
311
+
312
+ MCP tools for agent integration:
313
+ - `buildlog_gauntlet_issues` — Report findings, get next action
314
+ - `buildlog_gauntlet_accept_risk` — Accept remaining issues (optionally create GitHub issues)
315
+
272
316
  The gauntlet integrates with the learning loop—issues found become rules that accumulate confidence.
273
317
 
274
318
  ---
@@ -359,6 +403,8 @@ Available tools:
359
403
  | `buildlog_start_session` | Begin tracked experiment |
360
404
  | `buildlog_log_mistake` | Record mistake during session |
361
405
  | `buildlog_experiment_report` | Full experiment report |
406
+ | `buildlog_gauntlet_issues` | Report gauntlet findings, get next action |
407
+ | `buildlog_gauntlet_accept_risk` | Accept remaining issues, optionally create GH issues |
362
408
 
363
409
  ### CLI Commands
364
410
 
@@ -382,6 +428,7 @@ buildlog gauntlet list # Show reviewers
382
428
  buildlog gauntlet rules # Export rules
383
429
  buildlog gauntlet prompt <path> # Generate review prompt
384
430
  buildlog gauntlet learn <file> # Persist learnings
431
+ buildlog gauntlet loop <path> # Auto-fix loop with HITL checkpoints
385
432
  ```
386
433
 
387
434
  ---
@@ -421,21 +468,28 @@ This is how you know. Not vibes. Data.
421
468
 
422
469
  For the technically curious:
423
470
 
424
- | Concept | Application in buildlog |
425
- |---------|------------------------|
426
- | **Thompson Sampling** | Rule selection under uncertainty |
427
- | **Beta-Bernoulli model** | Posterior updates from binary reward |
428
- | **Contextual bandits** | Context-dependent rule selection |
429
- | **Regret bounds** | O(√(KT log K)) theoretical guarantee |
430
- | **Semantic hashing** | Mistake deduplication for RMR |
471
+ | Concept | Application in buildlog | Status |
472
+ |---------|------------------------|--------|
473
+ | **Confidence scoring** | Frequency + recency decay | ✅ Implemented |
474
+ | **Semantic hashing** | Mistake deduplication for RMR | ✅ Implemented |
475
+ | **Reward signals** | Binary feedback infrastructure | ✅ Implemented |
476
+ | **Thompson Sampling** | Rule selection under uncertainty | ⏳ Planned (v0.8) |
477
+ | **Beta-Bernoulli model** | Posterior updates from binary reward | ⏳ Planned (v0.8) |
478
+ | **Contextual bandits** | Context-dependent rule selection | ⏳ Planned (v0.8) |
479
+ | **Regret bounds** | O(√(KT log K)) theoretical guarantee | ⏳ Planned (v0.8) |
431
480
 
432
- We're not inventing new math. We're applying proven frameworks to a new domain.
481
+ We're not inventing new math. We're applying proven frameworks to a new domain. The infrastructure for reward collection is live; the bandit policy is the next milestone.
433
482
 
434
483
  ---
435
484
 
436
485
  ## Honest Limitations
437
486
 
438
- Things we don't have figured out yet:
487
+ ### Not Yet Implemented
488
+
489
+ - **Automatic rule selection**: Currently manual promotion; Thompson Sampling bandit planned for v0.8
490
+ - **Context-aware surfacing**: Rules are surfaced globally, not based on task context
491
+
492
+ ### Hard Problems We're Working On
439
493
 
440
494
  - **Credit assignment**: When multiple rules are active, which one helped?
441
495
  - **Non-stationarity**: Developer skill changes over time
@@ -75,11 +75,30 @@ RMR is not the only metric that matters. But it's one we can measure, and measur
75
75
 
76
76
  ## The Mechanism
77
77
 
78
- buildlog uses **contextual bandits** to select which rules to surface.
78
+ buildlog is building toward **contextual bandits** for automatic rule selection. Here's where we are:
79
+
80
+ ### What Exists Today (v0.7)
79
81
 
80
82
  ```
81
83
  ┌─────────────────────────────────────────────────────────────────┐
82
- CONTEXTUAL BANDIT SETUP
84
+ CURRENT INFRASTRUCTURE
85
+ ├─────────────────────────────────────────────────────────────────┤
86
+ │ │
87
+ │ ✅ Rule extraction From entries, reviews, curated seeds │
88
+ │ ✅ Confidence scoring Frequency + recency based │
89
+ │ ✅ Reward logging Accept/reject/revision signals │
90
+ │ ✅ Experiment tracking Sessions, mistakes, RMR calculation │
91
+ │ ✅ Review gauntlet Curated persona-based code review │
92
+ │ ⏳ Manual promotion Human selects rules to surface │
93
+ │ │
94
+ └─────────────────────────────────────────────────────────────────┘
95
+ ```
96
+
97
+ ### What's Coming (v0.8+)
98
+
99
+ ```
100
+ ┌─────────────────────────────────────────────────────────────────┐
101
+ │ CONTEXTUAL BANDIT (PLANNED) │
83
102
  ├─────────────────────────────────────────────────────────────────┤
84
103
  │ │
85
104
  │ Context (c): Error class, file type, task category │
@@ -99,9 +118,9 @@ buildlog uses **contextual bandits** to select which rules to surface.
99
118
 
100
119
  **Reward** = did surfacing this rule actually help?
101
120
 
102
- The system explores (tries uncertain rules) and exploits (uses proven rules) based on accumulated evidence. Thompson Sampling provides theoretical guarantees: O(√(KT log K)) regret bounds.
121
+ The reward infrastructure exists. The bandit policy is next. Thompson Sampling will provide theoretical guarantees: O(√(KT log K)) regret bounds.
103
122
 
104
- This isn't magic. It's a well-understood framework with decades of research. We're applying it to agent rule selection.
123
+ We're building in public—the bandit implementation will be developed with full documentation of the process.
105
124
 
106
125
  ---
107
126
 
@@ -113,16 +132,20 @@ buildlog captures signal at every stage:
113
132
  flowchart LR
114
133
  A["Work Sessions"] --> B["Structured Entries"]
115
134
  B --> C["Extracted Rules"]
116
- C --> D["Bandit Selection"]
135
+ C --> D["Manual Promotion"]
117
136
  D --> E["Rule Surfaced"]
118
137
  E --> F["Human Feedback"]
119
- F --> G["Posterior Update"]
120
- G --> D
138
+ F --> G["Reward Logged"]
139
+ G -.-> H["Bandit Policy"]
140
+ H -.-> D
121
141
 
122
142
  style F fill:#ff6b6b,color:#fff
123
143
  style G fill:#4ecdc4,color:#fff
144
+ style H fill:#666,color:#fff,stroke-dasharray: 5 5
124
145
  ```
125
146
 
147
+ *Dashed: Coming in v0.8 — automatic rule selection via Thompson Sampling*
148
+
126
149
  ### Stage 1: Capture
127
150
  Document your work. Include the fuckups—they're the most valuable signal.
128
151
 
@@ -221,6 +244,27 @@ buildlog gauntlet rules --format markdown -o review_checklist.md
221
244
  buildlog gauntlet learn review_issues.json --source "PR#42"
222
245
  ```
223
246
 
247
+ ### Gauntlet Loop (Agent Integration)
248
+
249
+ For AI agents, the gauntlet loop automates the fix-rerun cycle:
250
+
251
+ ```bash
252
+ buildlog gauntlet loop src/ --persona security_karen --persona test_terrorist
253
+ ```
254
+
255
+ The loop provides structured checkpoints:
256
+
257
+ | Severity | Action | Human Needed? |
258
+ |----------|--------|---------------|
259
+ | **Critical** | Agent fixes, reruns | No |
260
+ | **Major** | Checkpoint: continue? | Yes |
261
+ | **Minor** | Accept risk or fix? | Yes |
262
+ | **Clean** | Done | No |
263
+
264
+ MCP tools for agent integration:
265
+ - `buildlog_gauntlet_issues` — Report findings, get next action
266
+ - `buildlog_gauntlet_accept_risk` — Accept remaining issues (optionally create GitHub issues)
267
+
224
268
  The gauntlet integrates with the learning loop—issues found become rules that accumulate confidence.
225
269
 
226
270
  ---
@@ -311,6 +355,8 @@ Available tools:
311
355
  | `buildlog_start_session` | Begin tracked experiment |
312
356
  | `buildlog_log_mistake` | Record mistake during session |
313
357
  | `buildlog_experiment_report` | Full experiment report |
358
+ | `buildlog_gauntlet_issues` | Report gauntlet findings, get next action |
359
+ | `buildlog_gauntlet_accept_risk` | Accept remaining issues, optionally create GH issues |
314
360
 
315
361
  ### CLI Commands
316
362
 
@@ -334,6 +380,7 @@ buildlog gauntlet list # Show reviewers
334
380
  buildlog gauntlet rules # Export rules
335
381
  buildlog gauntlet prompt <path> # Generate review prompt
336
382
  buildlog gauntlet learn <file> # Persist learnings
383
+ buildlog gauntlet loop <path> # Auto-fix loop with HITL checkpoints
337
384
  ```
338
385
 
339
386
  ---
@@ -373,21 +420,28 @@ This is how you know. Not vibes. Data.
373
420
 
374
421
  For the technically curious:
375
422
 
376
- | Concept | Application in buildlog |
377
- |---------|------------------------|
378
- | **Thompson Sampling** | Rule selection under uncertainty |
379
- | **Beta-Bernoulli model** | Posterior updates from binary reward |
380
- | **Contextual bandits** | Context-dependent rule selection |
381
- | **Regret bounds** | O(√(KT log K)) theoretical guarantee |
382
- | **Semantic hashing** | Mistake deduplication for RMR |
423
+ | Concept | Application in buildlog | Status |
424
+ |---------|------------------------|--------|
425
+ | **Confidence scoring** | Frequency + recency decay | ✅ Implemented |
426
+ | **Semantic hashing** | Mistake deduplication for RMR | ✅ Implemented |
427
+ | **Reward signals** | Binary feedback infrastructure | ✅ Implemented |
428
+ | **Thompson Sampling** | Rule selection under uncertainty | ⏳ Planned (v0.8) |
429
+ | **Beta-Bernoulli model** | Posterior updates from binary reward | ⏳ Planned (v0.8) |
430
+ | **Contextual bandits** | Context-dependent rule selection | ⏳ Planned (v0.8) |
431
+ | **Regret bounds** | O(√(KT log K)) theoretical guarantee | ⏳ Planned (v0.8) |
383
432
 
384
- We're not inventing new math. We're applying proven frameworks to a new domain.
433
+ We're not inventing new math. We're applying proven frameworks to a new domain. The infrastructure for reward collection is live; the bandit policy is the next milestone.
385
434
 
386
435
  ---
387
436
 
388
437
  ## Honest Limitations
389
438
 
390
- Things we don't have figured out yet:
439
+ ### Not Yet Implemented
440
+
441
+ - **Automatic rule selection**: Currently manual promotion; Thompson Sampling bandit planned for v0.8
442
+ - **Context-aware surfacing**: Rules are surfaced globally, not based on task context
443
+
444
+ ### Hard Problems We're Working On
391
445
 
392
446
  - **Credit assignment**: When multiple rules are active, which one helped?
393
447
  - **Non-stationarity**: Developer skill changes over time
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "buildlog"
7
- version = "0.6.0"
7
+ version = "0.7.0"
8
8
  description = "Engineering notebook for AI-assisted development"
9
9
  readme = "README.md"
10
10
  license = "MIT"
@@ -921,15 +921,18 @@ def gauntlet_list(output_json: bool):
921
921
  """
922
922
  import json as json_module
923
923
 
924
- from buildlog.seeds import load_all_seeds
924
+ from buildlog.seeds import get_default_seeds_dir, load_all_seeds
925
925
 
926
- # Find seeds directory
927
- buildlog_dir = Path("buildlog")
928
- seeds_dir = buildlog_dir / ".buildlog" / "seeds"
926
+ # Find seeds directory (local overrides > buildlog template > package bundled)
927
+ seeds_dir = get_default_seeds_dir()
929
928
 
930
- # Also check .buildlog at repo root (common for installed templates)
931
- if not seeds_dir.exists():
932
- seeds_dir = Path(".buildlog") / "seeds"
929
+ if seeds_dir is None:
930
+ if output_json:
931
+ click.echo('{"personas": {}, "total_rules": 0, "error": "No seeds found"}')
932
+ else:
933
+ click.echo("No seed files found.")
934
+ click.echo("Seeds are bundled with buildlog - check your installation.")
935
+ return
933
936
 
934
937
  seeds = load_all_seeds(seeds_dir)
935
938
 
@@ -997,18 +1000,22 @@ def gauntlet_rules(persona: str, fmt: str, output: str | None):
997
1000
  """
998
1001
  import json as json_module
999
1002
 
1000
- from buildlog.seeds import load_all_seeds
1003
+ from buildlog.seeds import get_default_seeds_dir, load_all_seeds
1001
1004
 
1002
- # Find seeds directory
1003
- seeds_dir = Path(".buildlog") / "seeds"
1004
- if not seeds_dir.exists():
1005
- seeds_dir = Path("buildlog") / ".buildlog" / "seeds"
1005
+ # Find seeds directory (local overrides > buildlog template > package bundled)
1006
+ seeds_dir = get_default_seeds_dir()
1007
+
1008
+ if seeds_dir is None:
1009
+ click.echo("No seed files found.", err=True)
1010
+ click.echo(
1011
+ "Seeds are bundled with buildlog - check your installation.", err=True
1012
+ )
1013
+ raise SystemExit(1)
1006
1014
 
1007
1015
  seeds = load_all_seeds(seeds_dir)
1008
1016
 
1009
1017
  if not seeds:
1010
- click.echo("No seed files found.", err=True)
1011
- click.echo("Initialize with: buildlog init", err=True)
1018
+ click.echo("No seed files found in directory.", err=True)
1012
1019
  raise SystemExit(1)
1013
1020
 
1014
1021
  # Filter personas
@@ -1117,17 +1124,22 @@ def gauntlet_prompt(target: str, persona: tuple[str, ...], output: str | None):
1117
1124
  buildlog gauntlet prompt src/api.py -p security_karen
1118
1125
  buildlog gauntlet prompt . -o review_prompt.md
1119
1126
  """
1120
- from buildlog.seeds import load_all_seeds
1127
+ from buildlog.seeds import get_default_seeds_dir, load_all_seeds
1121
1128
 
1122
- # Find seeds directory
1123
- seeds_dir = Path(".buildlog") / "seeds"
1124
- if not seeds_dir.exists():
1125
- seeds_dir = Path("buildlog") / ".buildlog" / "seeds"
1129
+ # Find seeds directory (local overrides > buildlog template > package bundled)
1130
+ seeds_dir = get_default_seeds_dir()
1131
+
1132
+ if seeds_dir is None:
1133
+ click.echo("No seed files found.", err=True)
1134
+ click.echo(
1135
+ "Seeds are bundled with buildlog - check your installation.", err=True
1136
+ )
1137
+ raise SystemExit(1)
1126
1138
 
1127
1139
  seeds = load_all_seeds(seeds_dir)
1128
1140
 
1129
1141
  if not seeds:
1130
- click.echo("No seed files found.", err=True)
1142
+ click.echo("No seed files found in directory.", err=True)
1131
1143
  raise SystemExit(1)
1132
1144
 
1133
1145
  # Filter personas
@@ -1252,5 +1264,158 @@ def gauntlet_learn(issues_file: str, source: str | None, output_json: bool):
1252
1264
  click.echo(f" Total processed: {result.total_issues_processed}")
1253
1265
 
1254
1266
 
1267
+ @gauntlet.command("loop")
1268
+ @click.argument("target", type=click.Path(exists=True))
1269
+ @click.option(
1270
+ "--persona",
1271
+ "-p",
1272
+ multiple=True,
1273
+ help="Personas to run (default: all)",
1274
+ )
1275
+ @click.option(
1276
+ "--max-iterations",
1277
+ "-n",
1278
+ default=10,
1279
+ help="Maximum iterations to prevent infinite loops (default: 10)",
1280
+ )
1281
+ @click.option(
1282
+ "--stop-at",
1283
+ type=click.Choice(["criticals", "majors", "minors"]),
1284
+ default="minors",
1285
+ help="Stop after clearing this severity level (default: minors)",
1286
+ )
1287
+ @click.option(
1288
+ "--auto-gh-issues",
1289
+ is_flag=True,
1290
+ help="Create GitHub issues for remaining items when accepting risk",
1291
+ )
1292
+ @click.option("--json", "output_json", is_flag=True, help="Output as JSON")
1293
+ def gauntlet_loop(
1294
+ target: str,
1295
+ persona: tuple[str, ...],
1296
+ max_iterations: int,
1297
+ stop_at: str,
1298
+ auto_gh_issues: bool,
1299
+ output_json: bool,
1300
+ ):
1301
+ """Run the gauntlet loop: review, fix, repeat until clean.
1302
+
1303
+ This command orchestrates the gauntlet loop workflow:
1304
+
1305
+ 1. Generate review prompt for target code
1306
+ 2. Process issues and determine action
1307
+ 3. On criticals: output fix instructions, expect re-run
1308
+ 4. On majors only: checkpoint (ask to continue)
1309
+ 5. On minors only: checkpoint (accept risk?)
1310
+ 6. Optionally create GitHub issues for remaining items
1311
+
1312
+ The loop is designed to be run interactively with an agent
1313
+ (Claude Code, Cursor, etc.) that does the actual fixing.
1314
+
1315
+ Examples:
1316
+
1317
+ buildlog gauntlet loop src/
1318
+ buildlog gauntlet loop tests/ --stop-at majors
1319
+ buildlog gauntlet loop . --auto-gh-issues
1320
+ """
1321
+ import json as json_module
1322
+
1323
+ from buildlog.seeds import get_default_seeds_dir, load_all_seeds
1324
+
1325
+ # Find seeds directory
1326
+ seeds_dir = get_default_seeds_dir()
1327
+
1328
+ if seeds_dir is None:
1329
+ click.echo("No seed files found.", err=True)
1330
+ raise SystemExit(1)
1331
+
1332
+ seeds = load_all_seeds(seeds_dir)
1333
+
1334
+ if not seeds:
1335
+ click.echo("No seed files found in directory.", err=True)
1336
+ raise SystemExit(1)
1337
+
1338
+ # Filter personas
1339
+ if persona:
1340
+ seeds = {k: v for k, v in seeds.items() if k in persona}
1341
+ if not seeds:
1342
+ click.echo(f"No matching personas: {', '.join(persona)}", err=True)
1343
+ raise SystemExit(1)
1344
+
1345
+ target_path = Path(target)
1346
+
1347
+ # Generate persona rules summary
1348
+ rules_by_persona: dict[str, list[dict[str, str]]] = {}
1349
+ for name, sf in seeds.items():
1350
+ rules_by_persona[name] = [
1351
+ {"rule": r.rule, "antipattern": r.antipattern, "category": r.category}
1352
+ for r in sf.rules
1353
+ ]
1354
+
1355
+ # Loop instructions
1356
+ instructions = [
1357
+ "1. Review the target code using the rules from each persona",
1358
+ "2. Report all violations as JSON issues with: severity, category, description, rule_learned, location",
1359
+ "3. Call `buildlog_gauntlet_issues` with the issues list to determine next action",
1360
+ "4. If action='fix_criticals': Fix critical+major issues, then re-run gauntlet",
1361
+ "5. If action='checkpoint_majors': Ask user whether to continue fixing majors",
1362
+ "6. If action='checkpoint_minors': Ask user whether to accept risk or continue",
1363
+ "7. If user accepts risk and --auto-gh-issues: Call `buildlog_gauntlet_accept_risk` with remaining issues",
1364
+ "8. Repeat until action='clean' or max_iterations reached",
1365
+ ]
1366
+
1367
+ # Expected issue format
1368
+ issue_format = {
1369
+ "severity": "critical|major|minor|nitpick",
1370
+ "category": "security|testing|architectural|workflow|...",
1371
+ "description": "Concrete description of what's wrong",
1372
+ "rule_learned": "Generalizable rule for the future",
1373
+ "location": "file:line (optional)",
1374
+ }
1375
+
1376
+ # Build the loop output
1377
+ output = {
1378
+ "command": "gauntlet_loop",
1379
+ "target": str(target_path),
1380
+ "personas": list(seeds.keys()),
1381
+ "max_iterations": max_iterations,
1382
+ "stop_at": stop_at,
1383
+ "auto_gh_issues": auto_gh_issues,
1384
+ "rules_by_persona": rules_by_persona,
1385
+ "instructions": instructions,
1386
+ "issue_format": issue_format,
1387
+ }
1388
+
1389
+ if output_json:
1390
+ click.echo(json_module.dumps(output, indent=2))
1391
+ else:
1392
+ # Human-readable output
1393
+ click.echo("=" * 60)
1394
+ click.echo("GAUNTLET LOOP")
1395
+ click.echo("=" * 60)
1396
+ click.echo(f"\nTarget: {target_path}")
1397
+ click.echo(f"Personas: {', '.join(seeds.keys())}")
1398
+ click.echo(f"Max iterations: {max_iterations}")
1399
+ click.echo(f"Stop at: {stop_at}")
1400
+ click.echo(f"Auto GH issues: {auto_gh_issues}")
1401
+
1402
+ click.echo("\n--- RULES ---")
1403
+ for name, rules in rules_by_persona.items():
1404
+ click.echo(f"\n## {name.replace('_', ' ').title()}")
1405
+ for r in rules:
1406
+ click.echo(f" • {r['rule']}")
1407
+
1408
+ click.echo("\n--- LOOP WORKFLOW ---")
1409
+ for instruction in instructions:
1410
+ click.echo(f" {instruction}")
1411
+
1412
+ click.echo("\n--- ISSUE FORMAT ---")
1413
+ click.echo(json_module.dumps(issue_format, indent=2))
1414
+
1415
+ click.echo("\n" + "=" * 60)
1416
+ click.echo("Ready. Run gauntlet review and process issues.")
1417
+ click.echo("=" * 60)
1418
+
1419
+
1255
1420
  if __name__ == "__main__":
1256
1421
  main()
@@ -3,6 +3,8 @@
3
3
  from buildlog.core.operations import (
4
4
  DiffResult,
5
5
  EndSessionResult,
6
+ GauntletAcceptRiskResult,
7
+ GauntletLoopResult,
6
8
  LearnFromReviewResult,
7
9
  LogMistakeResult,
8
10
  LogRewardResult,
@@ -20,6 +22,8 @@ from buildlog.core.operations import (
20
22
  diff,
21
23
  end_session,
22
24
  find_skills_by_ids,
25
+ gauntlet_accept_risk,
26
+ gauntlet_process_issues,
23
27
  get_experiment_report,
24
28
  get_rewards,
25
29
  get_session_metrics,
@@ -50,6 +54,9 @@ __all__ = [
50
54
  "StartSessionResult",
51
55
  "EndSessionResult",
52
56
  "LogMistakeResult",
57
+ # Gauntlet loop
58
+ "GauntletLoopResult",
59
+ "GauntletAcceptRiskResult",
53
60
  "status",
54
61
  "promote",
55
62
  "reject",
@@ -64,4 +71,7 @@ __all__ = [
64
71
  "log_mistake",
65
72
  "get_session_metrics",
66
73
  "get_experiment_report",
74
+ # Gauntlet loop operations
75
+ "gauntlet_process_issues",
76
+ "gauntlet_accept_risk",
67
77
  ]