buildlog 0.6.0__tar.gz → 0.7.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {buildlog-0.6.0 → buildlog-0.7.0}/.gitignore +8 -0
- {buildlog-0.6.0 → buildlog-0.7.0}/PKG-INFO +71 -17
- {buildlog-0.6.0 → buildlog-0.7.0}/README.md +70 -16
- {buildlog-0.6.0 → buildlog-0.7.0}/pyproject.toml +1 -1
- {buildlog-0.6.0 → buildlog-0.7.0}/src/buildlog/cli.py +185 -20
- {buildlog-0.6.0 → buildlog-0.7.0}/src/buildlog/core/__init__.py +10 -0
- {buildlog-0.6.0 → buildlog-0.7.0}/src/buildlog/core/operations.py +234 -0
- buildlog-0.7.0/src/buildlog/data/seeds/security_karen.yaml +162 -0
- buildlog-0.7.0/src/buildlog/data/seeds/test_terrorist.yaml +280 -0
- {buildlog-0.6.0 → buildlog-0.7.0}/src/buildlog/mcp/server.py +6 -0
- {buildlog-0.6.0 → buildlog-0.7.0}/src/buildlog/mcp/tools.py +105 -0
- {buildlog-0.6.0 → buildlog-0.7.0}/src/buildlog/render/claude_md.py +17 -4
- {buildlog-0.6.0 → buildlog-0.7.0}/src/buildlog/render/tracking.py +20 -1
- {buildlog-0.6.0 → buildlog-0.7.0}/src/buildlog/seeds.py +91 -0
- buildlog-0.7.0/template/buildlog/assets/.gitkeep +0 -0
- {buildlog-0.6.0 → buildlog-0.7.0}/LICENSE +0 -0
- {buildlog-0.6.0 → buildlog-0.7.0}/copier.yml +0 -0
- {buildlog-0.6.0 → buildlog-0.7.0}/post_gen.py +0 -0
- {buildlog-0.6.0 → buildlog-0.7.0}/src/buildlog/__init__.py +0 -0
- {buildlog-0.6.0 → buildlog-0.7.0}/src/buildlog/confidence.py +0 -0
- /buildlog-0.6.0/template/buildlog/.gitkeep → /buildlog-0.7.0/src/buildlog/data/__init__.py +0 -0
- {buildlog-0.6.0 → buildlog-0.7.0}/src/buildlog/distill.py +0 -0
- {buildlog-0.6.0 → buildlog-0.7.0}/src/buildlog/embeddings.py +0 -0
- {buildlog-0.6.0 → buildlog-0.7.0}/src/buildlog/mcp/__init__.py +0 -0
- {buildlog-0.6.0 → buildlog-0.7.0}/src/buildlog/render/__init__.py +0 -0
- {buildlog-0.6.0 → buildlog-0.7.0}/src/buildlog/render/base.py +0 -0
- {buildlog-0.6.0 → buildlog-0.7.0}/src/buildlog/render/settings_json.py +0 -0
- {buildlog-0.6.0 → buildlog-0.7.0}/src/buildlog/render/skill.py +0 -0
- {buildlog-0.6.0 → buildlog-0.7.0}/src/buildlog/seed_engine/__init__.py +0 -0
- {buildlog-0.6.0 → buildlog-0.7.0}/src/buildlog/seed_engine/categorizers.py +0 -0
- {buildlog-0.6.0 → buildlog-0.7.0}/src/buildlog/seed_engine/extractors.py +0 -0
- {buildlog-0.6.0 → buildlog-0.7.0}/src/buildlog/seed_engine/generators.py +0 -0
- {buildlog-0.6.0 → buildlog-0.7.0}/src/buildlog/seed_engine/models.py +0 -0
- {buildlog-0.6.0 → buildlog-0.7.0}/src/buildlog/seed_engine/pipeline.py +0 -0
- {buildlog-0.6.0 → buildlog-0.7.0}/src/buildlog/seed_engine/sources.py +0 -0
- {buildlog-0.6.0 → buildlog-0.7.0}/src/buildlog/skills.py +0 -0
- {buildlog-0.6.0 → buildlog-0.7.0}/src/buildlog/stats.py +0 -0
- {buildlog-0.6.0/template/buildlog/assets → buildlog-0.7.0/template/buildlog}/.gitkeep +0 -0
- {buildlog-0.6.0 → buildlog-0.7.0}/template/buildlog/2026-01-01-example.md +0 -0
- {buildlog-0.6.0 → buildlog-0.7.0}/template/buildlog/BUILDLOG_SYSTEM.md +0 -0
- {buildlog-0.6.0 → buildlog-0.7.0}/template/buildlog/_TEMPLATE.md +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: buildlog
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.7.0
|
|
4
4
|
Summary: Engineering notebook for AI-assisted development
|
|
5
5
|
Project-URL: Homepage, https://github.com/Peleke/buildlog-template
|
|
6
6
|
Project-URL: Repository, https://github.com/Peleke/buildlog-template
|
|
@@ -123,11 +123,30 @@ RMR is not the only metric that matters. But it's one we can measure, and measur
|
|
|
123
123
|
|
|
124
124
|
## The Mechanism
|
|
125
125
|
|
|
126
|
-
buildlog
|
|
126
|
+
buildlog is building toward **contextual bandits** for automatic rule selection. Here's where we are:
|
|
127
|
+
|
|
128
|
+
### What Exists Today (v0.7)
|
|
127
129
|
|
|
128
130
|
```
|
|
129
131
|
┌─────────────────────────────────────────────────────────────────┐
|
|
130
|
-
│
|
|
132
|
+
│ CURRENT INFRASTRUCTURE │
|
|
133
|
+
├─────────────────────────────────────────────────────────────────┤
|
|
134
|
+
│ │
|
|
135
|
+
│ ✅ Rule extraction From entries, reviews, curated seeds │
|
|
136
|
+
│ ✅ Confidence scoring Frequency + recency based │
|
|
137
|
+
│ ✅ Reward logging Accept/reject/revision signals │
|
|
138
|
+
│ ✅ Experiment tracking Sessions, mistakes, RMR calculation │
|
|
139
|
+
│ ✅ Review gauntlet Curated persona-based code review │
|
|
140
|
+
│ ⏳ Manual promotion Human selects rules to surface │
|
|
141
|
+
│ │
|
|
142
|
+
└─────────────────────────────────────────────────────────────────┘
|
|
143
|
+
```
|
|
144
|
+
|
|
145
|
+
### What's Coming (v0.8+)
|
|
146
|
+
|
|
147
|
+
```
|
|
148
|
+
┌─────────────────────────────────────────────────────────────────┐
|
|
149
|
+
│ CONTEXTUAL BANDIT (PLANNED) │
|
|
131
150
|
├─────────────────────────────────────────────────────────────────┤
|
|
132
151
|
│ │
|
|
133
152
|
│ Context (c): Error class, file type, task category │
|
|
@@ -147,9 +166,9 @@ buildlog uses **contextual bandits** to select which rules to surface.
|
|
|
147
166
|
|
|
148
167
|
**Reward** = did surfacing this rule actually help?
|
|
149
168
|
|
|
150
|
-
The
|
|
169
|
+
The reward infrastructure exists. The bandit policy is next. Thompson Sampling will provide theoretical guarantees: O(√(KT log K)) regret bounds.
|
|
151
170
|
|
|
152
|
-
|
|
171
|
+
We're building in public—the bandit implementation will be developed with full documentation of the process.
|
|
153
172
|
|
|
154
173
|
---
|
|
155
174
|
|
|
@@ -161,16 +180,20 @@ buildlog captures signal at every stage:
|
|
|
161
180
|
flowchart LR
|
|
162
181
|
A["Work Sessions"] --> B["Structured Entries"]
|
|
163
182
|
B --> C["Extracted Rules"]
|
|
164
|
-
C --> D["
|
|
183
|
+
C --> D["Manual Promotion"]
|
|
165
184
|
D --> E["Rule Surfaced"]
|
|
166
185
|
E --> F["Human Feedback"]
|
|
167
|
-
F --> G["
|
|
168
|
-
G
|
|
186
|
+
F --> G["Reward Logged"]
|
|
187
|
+
G -.-> H["Bandit Policy"]
|
|
188
|
+
H -.-> D
|
|
169
189
|
|
|
170
190
|
style F fill:#ff6b6b,color:#fff
|
|
171
191
|
style G fill:#4ecdc4,color:#fff
|
|
192
|
+
style H fill:#666,color:#fff,stroke-dasharray: 5 5
|
|
172
193
|
```
|
|
173
194
|
|
|
195
|
+
*Dashed: Coming in v0.8 — automatic rule selection via Thompson Sampling*
|
|
196
|
+
|
|
174
197
|
### Stage 1: Capture
|
|
175
198
|
Document your work. Include the fuckups—they're the most valuable signal.
|
|
176
199
|
|
|
@@ -269,6 +292,27 @@ buildlog gauntlet rules --format markdown -o review_checklist.md
|
|
|
269
292
|
buildlog gauntlet learn review_issues.json --source "PR#42"
|
|
270
293
|
```
|
|
271
294
|
|
|
295
|
+
### Gauntlet Loop (Agent Integration)
|
|
296
|
+
|
|
297
|
+
For AI agents, the gauntlet loop automates the fix-rerun cycle:
|
|
298
|
+
|
|
299
|
+
```bash
|
|
300
|
+
buildlog gauntlet loop src/ --persona security_karen --persona test_terrorist
|
|
301
|
+
```
|
|
302
|
+
|
|
303
|
+
The loop provides structured checkpoints:
|
|
304
|
+
|
|
305
|
+
| Severity | Action | Human Needed? |
|
|
306
|
+
|----------|--------|---------------|
|
|
307
|
+
| **Critical** | Agent fixes, reruns | No |
|
|
308
|
+
| **Major** | Checkpoint: continue? | Yes |
|
|
309
|
+
| **Minor** | Accept risk or fix? | Yes |
|
|
310
|
+
| **Clean** | Done | No |
|
|
311
|
+
|
|
312
|
+
MCP tools for agent integration:
|
|
313
|
+
- `buildlog_gauntlet_issues` — Report findings, get next action
|
|
314
|
+
- `buildlog_gauntlet_accept_risk` — Accept remaining issues (optionally create GitHub issues)
|
|
315
|
+
|
|
272
316
|
The gauntlet integrates with the learning loop—issues found become rules that accumulate confidence.
|
|
273
317
|
|
|
274
318
|
---
|
|
@@ -359,6 +403,8 @@ Available tools:
|
|
|
359
403
|
| `buildlog_start_session` | Begin tracked experiment |
|
|
360
404
|
| `buildlog_log_mistake` | Record mistake during session |
|
|
361
405
|
| `buildlog_experiment_report` | Full experiment report |
|
|
406
|
+
| `buildlog_gauntlet_issues` | Report gauntlet findings, get next action |
|
|
407
|
+
| `buildlog_gauntlet_accept_risk` | Accept remaining issues, optionally create GH issues |
|
|
362
408
|
|
|
363
409
|
### CLI Commands
|
|
364
410
|
|
|
@@ -382,6 +428,7 @@ buildlog gauntlet list # Show reviewers
|
|
|
382
428
|
buildlog gauntlet rules # Export rules
|
|
383
429
|
buildlog gauntlet prompt <path> # Generate review prompt
|
|
384
430
|
buildlog gauntlet learn <file> # Persist learnings
|
|
431
|
+
buildlog gauntlet loop <path> # Auto-fix loop with HITL checkpoints
|
|
385
432
|
```
|
|
386
433
|
|
|
387
434
|
---
|
|
@@ -421,21 +468,28 @@ This is how you know. Not vibes. Data.
|
|
|
421
468
|
|
|
422
469
|
For the technically curious:
|
|
423
470
|
|
|
424
|
-
| Concept | Application in buildlog |
|
|
425
|
-
|
|
426
|
-
| **
|
|
427
|
-
| **
|
|
428
|
-
| **
|
|
429
|
-
| **
|
|
430
|
-
| **
|
|
471
|
+
| Concept | Application in buildlog | Status |
|
|
472
|
+
|---------|------------------------|--------|
|
|
473
|
+
| **Confidence scoring** | Frequency + recency decay | ✅ Implemented |
|
|
474
|
+
| **Semantic hashing** | Mistake deduplication for RMR | ✅ Implemented |
|
|
475
|
+
| **Reward signals** | Binary feedback infrastructure | ✅ Implemented |
|
|
476
|
+
| **Thompson Sampling** | Rule selection under uncertainty | ⏳ Planned (v0.8) |
|
|
477
|
+
| **Beta-Bernoulli model** | Posterior updates from binary reward | ⏳ Planned (v0.8) |
|
|
478
|
+
| **Contextual bandits** | Context-dependent rule selection | ⏳ Planned (v0.8) |
|
|
479
|
+
| **Regret bounds** | O(√(KT log K)) theoretical guarantee | ⏳ Planned (v0.8) |
|
|
431
480
|
|
|
432
|
-
We're not inventing new math. We're applying proven frameworks to a new domain.
|
|
481
|
+
We're not inventing new math. We're applying proven frameworks to a new domain. The infrastructure for reward collection is live; the bandit policy is the next milestone.
|
|
433
482
|
|
|
434
483
|
---
|
|
435
484
|
|
|
436
485
|
## Honest Limitations
|
|
437
486
|
|
|
438
|
-
|
|
487
|
+
### Not Yet Implemented
|
|
488
|
+
|
|
489
|
+
- **Automatic rule selection**: Currently manual promotion; Thompson Sampling bandit planned for v0.8
|
|
490
|
+
- **Context-aware surfacing**: Rules are surfaced globally, not based on task context
|
|
491
|
+
|
|
492
|
+
### Hard Problems We're Working On
|
|
439
493
|
|
|
440
494
|
- **Credit assignment**: When multiple rules are active, which one helped?
|
|
441
495
|
- **Non-stationarity**: Developer skill changes over time
|
|
@@ -75,11 +75,30 @@ RMR is not the only metric that matters. But it's one we can measure, and measur
|
|
|
75
75
|
|
|
76
76
|
## The Mechanism
|
|
77
77
|
|
|
78
|
-
buildlog
|
|
78
|
+
buildlog is building toward **contextual bandits** for automatic rule selection. Here's where we are:
|
|
79
|
+
|
|
80
|
+
### What Exists Today (v0.7)
|
|
79
81
|
|
|
80
82
|
```
|
|
81
83
|
┌─────────────────────────────────────────────────────────────────┐
|
|
82
|
-
│
|
|
84
|
+
│ CURRENT INFRASTRUCTURE │
|
|
85
|
+
├─────────────────────────────────────────────────────────────────┤
|
|
86
|
+
│ │
|
|
87
|
+
│ ✅ Rule extraction From entries, reviews, curated seeds │
|
|
88
|
+
│ ✅ Confidence scoring Frequency + recency based │
|
|
89
|
+
│ ✅ Reward logging Accept/reject/revision signals │
|
|
90
|
+
│ ✅ Experiment tracking Sessions, mistakes, RMR calculation │
|
|
91
|
+
│ ✅ Review gauntlet Curated persona-based code review │
|
|
92
|
+
│ ⏳ Manual promotion Human selects rules to surface │
|
|
93
|
+
│ │
|
|
94
|
+
└─────────────────────────────────────────────────────────────────┘
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
### What's Coming (v0.8+)
|
|
98
|
+
|
|
99
|
+
```
|
|
100
|
+
┌─────────────────────────────────────────────────────────────────┐
|
|
101
|
+
│ CONTEXTUAL BANDIT (PLANNED) │
|
|
83
102
|
├─────────────────────────────────────────────────────────────────┤
|
|
84
103
|
│ │
|
|
85
104
|
│ Context (c): Error class, file type, task category │
|
|
@@ -99,9 +118,9 @@ buildlog uses **contextual bandits** to select which rules to surface.
|
|
|
99
118
|
|
|
100
119
|
**Reward** = did surfacing this rule actually help?
|
|
101
120
|
|
|
102
|
-
The
|
|
121
|
+
The reward infrastructure exists. The bandit policy is next. Thompson Sampling will provide theoretical guarantees: O(√(KT log K)) regret bounds.
|
|
103
122
|
|
|
104
|
-
|
|
123
|
+
We're building in public—the bandit implementation will be developed with full documentation of the process.
|
|
105
124
|
|
|
106
125
|
---
|
|
107
126
|
|
|
@@ -113,16 +132,20 @@ buildlog captures signal at every stage:
|
|
|
113
132
|
flowchart LR
|
|
114
133
|
A["Work Sessions"] --> B["Structured Entries"]
|
|
115
134
|
B --> C["Extracted Rules"]
|
|
116
|
-
C --> D["
|
|
135
|
+
C --> D["Manual Promotion"]
|
|
117
136
|
D --> E["Rule Surfaced"]
|
|
118
137
|
E --> F["Human Feedback"]
|
|
119
|
-
F --> G["
|
|
120
|
-
G
|
|
138
|
+
F --> G["Reward Logged"]
|
|
139
|
+
G -.-> H["Bandit Policy"]
|
|
140
|
+
H -.-> D
|
|
121
141
|
|
|
122
142
|
style F fill:#ff6b6b,color:#fff
|
|
123
143
|
style G fill:#4ecdc4,color:#fff
|
|
144
|
+
style H fill:#666,color:#fff,stroke-dasharray: 5 5
|
|
124
145
|
```
|
|
125
146
|
|
|
147
|
+
*Dashed: Coming in v0.8 — automatic rule selection via Thompson Sampling*
|
|
148
|
+
|
|
126
149
|
### Stage 1: Capture
|
|
127
150
|
Document your work. Include the fuckups—they're the most valuable signal.
|
|
128
151
|
|
|
@@ -221,6 +244,27 @@ buildlog gauntlet rules --format markdown -o review_checklist.md
|
|
|
221
244
|
buildlog gauntlet learn review_issues.json --source "PR#42"
|
|
222
245
|
```
|
|
223
246
|
|
|
247
|
+
### Gauntlet Loop (Agent Integration)
|
|
248
|
+
|
|
249
|
+
For AI agents, the gauntlet loop automates the fix-rerun cycle:
|
|
250
|
+
|
|
251
|
+
```bash
|
|
252
|
+
buildlog gauntlet loop src/ --persona security_karen --persona test_terrorist
|
|
253
|
+
```
|
|
254
|
+
|
|
255
|
+
The loop provides structured checkpoints:
|
|
256
|
+
|
|
257
|
+
| Severity | Action | Human Needed? |
|
|
258
|
+
|----------|--------|---------------|
|
|
259
|
+
| **Critical** | Agent fixes, reruns | No |
|
|
260
|
+
| **Major** | Checkpoint: continue? | Yes |
|
|
261
|
+
| **Minor** | Accept risk or fix? | Yes |
|
|
262
|
+
| **Clean** | Done | No |
|
|
263
|
+
|
|
264
|
+
MCP tools for agent integration:
|
|
265
|
+
- `buildlog_gauntlet_issues` — Report findings, get next action
|
|
266
|
+
- `buildlog_gauntlet_accept_risk` — Accept remaining issues (optionally create GitHub issues)
|
|
267
|
+
|
|
224
268
|
The gauntlet integrates with the learning loop—issues found become rules that accumulate confidence.
|
|
225
269
|
|
|
226
270
|
---
|
|
@@ -311,6 +355,8 @@ Available tools:
|
|
|
311
355
|
| `buildlog_start_session` | Begin tracked experiment |
|
|
312
356
|
| `buildlog_log_mistake` | Record mistake during session |
|
|
313
357
|
| `buildlog_experiment_report` | Full experiment report |
|
|
358
|
+
| `buildlog_gauntlet_issues` | Report gauntlet findings, get next action |
|
|
359
|
+
| `buildlog_gauntlet_accept_risk` | Accept remaining issues, optionally create GH issues |
|
|
314
360
|
|
|
315
361
|
### CLI Commands
|
|
316
362
|
|
|
@@ -334,6 +380,7 @@ buildlog gauntlet list # Show reviewers
|
|
|
334
380
|
buildlog gauntlet rules # Export rules
|
|
335
381
|
buildlog gauntlet prompt <path> # Generate review prompt
|
|
336
382
|
buildlog gauntlet learn <file> # Persist learnings
|
|
383
|
+
buildlog gauntlet loop <path> # Auto-fix loop with HITL checkpoints
|
|
337
384
|
```
|
|
338
385
|
|
|
339
386
|
---
|
|
@@ -373,21 +420,28 @@ This is how you know. Not vibes. Data.
|
|
|
373
420
|
|
|
374
421
|
For the technically curious:
|
|
375
422
|
|
|
376
|
-
| Concept | Application in buildlog |
|
|
377
|
-
|
|
378
|
-
| **
|
|
379
|
-
| **
|
|
380
|
-
| **
|
|
381
|
-
| **
|
|
382
|
-
| **
|
|
423
|
+
| Concept | Application in buildlog | Status |
|
|
424
|
+
|---------|------------------------|--------|
|
|
425
|
+
| **Confidence scoring** | Frequency + recency decay | ✅ Implemented |
|
|
426
|
+
| **Semantic hashing** | Mistake deduplication for RMR | ✅ Implemented |
|
|
427
|
+
| **Reward signals** | Binary feedback infrastructure | ✅ Implemented |
|
|
428
|
+
| **Thompson Sampling** | Rule selection under uncertainty | ⏳ Planned (v0.8) |
|
|
429
|
+
| **Beta-Bernoulli model** | Posterior updates from binary reward | ⏳ Planned (v0.8) |
|
|
430
|
+
| **Contextual bandits** | Context-dependent rule selection | ⏳ Planned (v0.8) |
|
|
431
|
+
| **Regret bounds** | O(√(KT log K)) theoretical guarantee | ⏳ Planned (v0.8) |
|
|
383
432
|
|
|
384
|
-
We're not inventing new math. We're applying proven frameworks to a new domain.
|
|
433
|
+
We're not inventing new math. We're applying proven frameworks to a new domain. The infrastructure for reward collection is live; the bandit policy is the next milestone.
|
|
385
434
|
|
|
386
435
|
---
|
|
387
436
|
|
|
388
437
|
## Honest Limitations
|
|
389
438
|
|
|
390
|
-
|
|
439
|
+
### Not Yet Implemented
|
|
440
|
+
|
|
441
|
+
- **Automatic rule selection**: Currently manual promotion; Thompson Sampling bandit planned for v0.8
|
|
442
|
+
- **Context-aware surfacing**: Rules are surfaced globally, not based on task context
|
|
443
|
+
|
|
444
|
+
### Hard Problems We're Working On
|
|
391
445
|
|
|
392
446
|
- **Credit assignment**: When multiple rules are active, which one helped?
|
|
393
447
|
- **Non-stationarity**: Developer skill changes over time
|
|
@@ -921,15 +921,18 @@ def gauntlet_list(output_json: bool):
|
|
|
921
921
|
"""
|
|
922
922
|
import json as json_module
|
|
923
923
|
|
|
924
|
-
from buildlog.seeds import load_all_seeds
|
|
924
|
+
from buildlog.seeds import get_default_seeds_dir, load_all_seeds
|
|
925
925
|
|
|
926
|
-
# Find seeds directory
|
|
927
|
-
|
|
928
|
-
seeds_dir = buildlog_dir / ".buildlog" / "seeds"
|
|
926
|
+
# Find seeds directory (local overrides > buildlog template > package bundled)
|
|
927
|
+
seeds_dir = get_default_seeds_dir()
|
|
929
928
|
|
|
930
|
-
|
|
931
|
-
|
|
932
|
-
|
|
929
|
+
if seeds_dir is None:
|
|
930
|
+
if output_json:
|
|
931
|
+
click.echo('{"personas": {}, "total_rules": 0, "error": "No seeds found"}')
|
|
932
|
+
else:
|
|
933
|
+
click.echo("No seed files found.")
|
|
934
|
+
click.echo("Seeds are bundled with buildlog - check your installation.")
|
|
935
|
+
return
|
|
933
936
|
|
|
934
937
|
seeds = load_all_seeds(seeds_dir)
|
|
935
938
|
|
|
@@ -997,18 +1000,22 @@ def gauntlet_rules(persona: str, fmt: str, output: str | None):
|
|
|
997
1000
|
"""
|
|
998
1001
|
import json as json_module
|
|
999
1002
|
|
|
1000
|
-
from buildlog.seeds import load_all_seeds
|
|
1003
|
+
from buildlog.seeds import get_default_seeds_dir, load_all_seeds
|
|
1001
1004
|
|
|
1002
|
-
# Find seeds directory
|
|
1003
|
-
seeds_dir =
|
|
1004
|
-
|
|
1005
|
-
|
|
1005
|
+
# Find seeds directory (local overrides > buildlog template > package bundled)
|
|
1006
|
+
seeds_dir = get_default_seeds_dir()
|
|
1007
|
+
|
|
1008
|
+
if seeds_dir is None:
|
|
1009
|
+
click.echo("No seed files found.", err=True)
|
|
1010
|
+
click.echo(
|
|
1011
|
+
"Seeds are bundled with buildlog - check your installation.", err=True
|
|
1012
|
+
)
|
|
1013
|
+
raise SystemExit(1)
|
|
1006
1014
|
|
|
1007
1015
|
seeds = load_all_seeds(seeds_dir)
|
|
1008
1016
|
|
|
1009
1017
|
if not seeds:
|
|
1010
|
-
click.echo("No seed files found.", err=True)
|
|
1011
|
-
click.echo("Initialize with: buildlog init", err=True)
|
|
1018
|
+
click.echo("No seed files found in directory.", err=True)
|
|
1012
1019
|
raise SystemExit(1)
|
|
1013
1020
|
|
|
1014
1021
|
# Filter personas
|
|
@@ -1117,17 +1124,22 @@ def gauntlet_prompt(target: str, persona: tuple[str, ...], output: str | None):
|
|
|
1117
1124
|
buildlog gauntlet prompt src/api.py -p security_karen
|
|
1118
1125
|
buildlog gauntlet prompt . -o review_prompt.md
|
|
1119
1126
|
"""
|
|
1120
|
-
from buildlog.seeds import load_all_seeds
|
|
1127
|
+
from buildlog.seeds import get_default_seeds_dir, load_all_seeds
|
|
1121
1128
|
|
|
1122
|
-
# Find seeds directory
|
|
1123
|
-
seeds_dir =
|
|
1124
|
-
|
|
1125
|
-
|
|
1129
|
+
# Find seeds directory (local overrides > buildlog template > package bundled)
|
|
1130
|
+
seeds_dir = get_default_seeds_dir()
|
|
1131
|
+
|
|
1132
|
+
if seeds_dir is None:
|
|
1133
|
+
click.echo("No seed files found.", err=True)
|
|
1134
|
+
click.echo(
|
|
1135
|
+
"Seeds are bundled with buildlog - check your installation.", err=True
|
|
1136
|
+
)
|
|
1137
|
+
raise SystemExit(1)
|
|
1126
1138
|
|
|
1127
1139
|
seeds = load_all_seeds(seeds_dir)
|
|
1128
1140
|
|
|
1129
1141
|
if not seeds:
|
|
1130
|
-
click.echo("No seed files found.", err=True)
|
|
1142
|
+
click.echo("No seed files found in directory.", err=True)
|
|
1131
1143
|
raise SystemExit(1)
|
|
1132
1144
|
|
|
1133
1145
|
# Filter personas
|
|
@@ -1252,5 +1264,158 @@ def gauntlet_learn(issues_file: str, source: str | None, output_json: bool):
|
|
|
1252
1264
|
click.echo(f" Total processed: {result.total_issues_processed}")
|
|
1253
1265
|
|
|
1254
1266
|
|
|
1267
|
+
@gauntlet.command("loop")
|
|
1268
|
+
@click.argument("target", type=click.Path(exists=True))
|
|
1269
|
+
@click.option(
|
|
1270
|
+
"--persona",
|
|
1271
|
+
"-p",
|
|
1272
|
+
multiple=True,
|
|
1273
|
+
help="Personas to run (default: all)",
|
|
1274
|
+
)
|
|
1275
|
+
@click.option(
|
|
1276
|
+
"--max-iterations",
|
|
1277
|
+
"-n",
|
|
1278
|
+
default=10,
|
|
1279
|
+
help="Maximum iterations to prevent infinite loops (default: 10)",
|
|
1280
|
+
)
|
|
1281
|
+
@click.option(
|
|
1282
|
+
"--stop-at",
|
|
1283
|
+
type=click.Choice(["criticals", "majors", "minors"]),
|
|
1284
|
+
default="minors",
|
|
1285
|
+
help="Stop after clearing this severity level (default: minors)",
|
|
1286
|
+
)
|
|
1287
|
+
@click.option(
|
|
1288
|
+
"--auto-gh-issues",
|
|
1289
|
+
is_flag=True,
|
|
1290
|
+
help="Create GitHub issues for remaining items when accepting risk",
|
|
1291
|
+
)
|
|
1292
|
+
@click.option("--json", "output_json", is_flag=True, help="Output as JSON")
|
|
1293
|
+
def gauntlet_loop(
|
|
1294
|
+
target: str,
|
|
1295
|
+
persona: tuple[str, ...],
|
|
1296
|
+
max_iterations: int,
|
|
1297
|
+
stop_at: str,
|
|
1298
|
+
auto_gh_issues: bool,
|
|
1299
|
+
output_json: bool,
|
|
1300
|
+
):
|
|
1301
|
+
"""Run the gauntlet loop: review, fix, repeat until clean.
|
|
1302
|
+
|
|
1303
|
+
This command orchestrates the gauntlet loop workflow:
|
|
1304
|
+
|
|
1305
|
+
1. Generate review prompt for target code
|
|
1306
|
+
2. Process issues and determine action
|
|
1307
|
+
3. On criticals: output fix instructions, expect re-run
|
|
1308
|
+
4. On majors only: checkpoint (ask to continue)
|
|
1309
|
+
5. On minors only: checkpoint (accept risk?)
|
|
1310
|
+
6. Optionally create GitHub issues for remaining items
|
|
1311
|
+
|
|
1312
|
+
The loop is designed to be run interactively with an agent
|
|
1313
|
+
(Claude Code, Cursor, etc.) that does the actual fixing.
|
|
1314
|
+
|
|
1315
|
+
Examples:
|
|
1316
|
+
|
|
1317
|
+
buildlog gauntlet loop src/
|
|
1318
|
+
buildlog gauntlet loop tests/ --stop-at majors
|
|
1319
|
+
buildlog gauntlet loop . --auto-gh-issues
|
|
1320
|
+
"""
|
|
1321
|
+
import json as json_module
|
|
1322
|
+
|
|
1323
|
+
from buildlog.seeds import get_default_seeds_dir, load_all_seeds
|
|
1324
|
+
|
|
1325
|
+
# Find seeds directory
|
|
1326
|
+
seeds_dir = get_default_seeds_dir()
|
|
1327
|
+
|
|
1328
|
+
if seeds_dir is None:
|
|
1329
|
+
click.echo("No seed files found.", err=True)
|
|
1330
|
+
raise SystemExit(1)
|
|
1331
|
+
|
|
1332
|
+
seeds = load_all_seeds(seeds_dir)
|
|
1333
|
+
|
|
1334
|
+
if not seeds:
|
|
1335
|
+
click.echo("No seed files found in directory.", err=True)
|
|
1336
|
+
raise SystemExit(1)
|
|
1337
|
+
|
|
1338
|
+
# Filter personas
|
|
1339
|
+
if persona:
|
|
1340
|
+
seeds = {k: v for k, v in seeds.items() if k in persona}
|
|
1341
|
+
if not seeds:
|
|
1342
|
+
click.echo(f"No matching personas: {', '.join(persona)}", err=True)
|
|
1343
|
+
raise SystemExit(1)
|
|
1344
|
+
|
|
1345
|
+
target_path = Path(target)
|
|
1346
|
+
|
|
1347
|
+
# Generate persona rules summary
|
|
1348
|
+
rules_by_persona: dict[str, list[dict[str, str]]] = {}
|
|
1349
|
+
for name, sf in seeds.items():
|
|
1350
|
+
rules_by_persona[name] = [
|
|
1351
|
+
{"rule": r.rule, "antipattern": r.antipattern, "category": r.category}
|
|
1352
|
+
for r in sf.rules
|
|
1353
|
+
]
|
|
1354
|
+
|
|
1355
|
+
# Loop instructions
|
|
1356
|
+
instructions = [
|
|
1357
|
+
"1. Review the target code using the rules from each persona",
|
|
1358
|
+
"2. Report all violations as JSON issues with: severity, category, description, rule_learned, location",
|
|
1359
|
+
"3. Call `buildlog_gauntlet_issues` with the issues list to determine next action",
|
|
1360
|
+
"4. If action='fix_criticals': Fix critical+major issues, then re-run gauntlet",
|
|
1361
|
+
"5. If action='checkpoint_majors': Ask user whether to continue fixing majors",
|
|
1362
|
+
"6. If action='checkpoint_minors': Ask user whether to accept risk or continue",
|
|
1363
|
+
"7. If user accepts risk and --auto-gh-issues: Call `buildlog_gauntlet_accept_risk` with remaining issues",
|
|
1364
|
+
"8. Repeat until action='clean' or max_iterations reached",
|
|
1365
|
+
]
|
|
1366
|
+
|
|
1367
|
+
# Expected issue format
|
|
1368
|
+
issue_format = {
|
|
1369
|
+
"severity": "critical|major|minor|nitpick",
|
|
1370
|
+
"category": "security|testing|architectural|workflow|...",
|
|
1371
|
+
"description": "Concrete description of what's wrong",
|
|
1372
|
+
"rule_learned": "Generalizable rule for the future",
|
|
1373
|
+
"location": "file:line (optional)",
|
|
1374
|
+
}
|
|
1375
|
+
|
|
1376
|
+
# Build the loop output
|
|
1377
|
+
output = {
|
|
1378
|
+
"command": "gauntlet_loop",
|
|
1379
|
+
"target": str(target_path),
|
|
1380
|
+
"personas": list(seeds.keys()),
|
|
1381
|
+
"max_iterations": max_iterations,
|
|
1382
|
+
"stop_at": stop_at,
|
|
1383
|
+
"auto_gh_issues": auto_gh_issues,
|
|
1384
|
+
"rules_by_persona": rules_by_persona,
|
|
1385
|
+
"instructions": instructions,
|
|
1386
|
+
"issue_format": issue_format,
|
|
1387
|
+
}
|
|
1388
|
+
|
|
1389
|
+
if output_json:
|
|
1390
|
+
click.echo(json_module.dumps(output, indent=2))
|
|
1391
|
+
else:
|
|
1392
|
+
# Human-readable output
|
|
1393
|
+
click.echo("=" * 60)
|
|
1394
|
+
click.echo("GAUNTLET LOOP")
|
|
1395
|
+
click.echo("=" * 60)
|
|
1396
|
+
click.echo(f"\nTarget: {target_path}")
|
|
1397
|
+
click.echo(f"Personas: {', '.join(seeds.keys())}")
|
|
1398
|
+
click.echo(f"Max iterations: {max_iterations}")
|
|
1399
|
+
click.echo(f"Stop at: {stop_at}")
|
|
1400
|
+
click.echo(f"Auto GH issues: {auto_gh_issues}")
|
|
1401
|
+
|
|
1402
|
+
click.echo("\n--- RULES ---")
|
|
1403
|
+
for name, rules in rules_by_persona.items():
|
|
1404
|
+
click.echo(f"\n## {name.replace('_', ' ').title()}")
|
|
1405
|
+
for r in rules:
|
|
1406
|
+
click.echo(f" • {r['rule']}")
|
|
1407
|
+
|
|
1408
|
+
click.echo("\n--- LOOP WORKFLOW ---")
|
|
1409
|
+
for instruction in instructions:
|
|
1410
|
+
click.echo(f" {instruction}")
|
|
1411
|
+
|
|
1412
|
+
click.echo("\n--- ISSUE FORMAT ---")
|
|
1413
|
+
click.echo(json_module.dumps(issue_format, indent=2))
|
|
1414
|
+
|
|
1415
|
+
click.echo("\n" + "=" * 60)
|
|
1416
|
+
click.echo("Ready. Run gauntlet review and process issues.")
|
|
1417
|
+
click.echo("=" * 60)
|
|
1418
|
+
|
|
1419
|
+
|
|
1255
1420
|
if __name__ == "__main__":
|
|
1256
1421
|
main()
|
|
@@ -3,6 +3,8 @@
|
|
|
3
3
|
from buildlog.core.operations import (
|
|
4
4
|
DiffResult,
|
|
5
5
|
EndSessionResult,
|
|
6
|
+
GauntletAcceptRiskResult,
|
|
7
|
+
GauntletLoopResult,
|
|
6
8
|
LearnFromReviewResult,
|
|
7
9
|
LogMistakeResult,
|
|
8
10
|
LogRewardResult,
|
|
@@ -20,6 +22,8 @@ from buildlog.core.operations import (
|
|
|
20
22
|
diff,
|
|
21
23
|
end_session,
|
|
22
24
|
find_skills_by_ids,
|
|
25
|
+
gauntlet_accept_risk,
|
|
26
|
+
gauntlet_process_issues,
|
|
23
27
|
get_experiment_report,
|
|
24
28
|
get_rewards,
|
|
25
29
|
get_session_metrics,
|
|
@@ -50,6 +54,9 @@ __all__ = [
|
|
|
50
54
|
"StartSessionResult",
|
|
51
55
|
"EndSessionResult",
|
|
52
56
|
"LogMistakeResult",
|
|
57
|
+
# Gauntlet loop
|
|
58
|
+
"GauntletLoopResult",
|
|
59
|
+
"GauntletAcceptRiskResult",
|
|
53
60
|
"status",
|
|
54
61
|
"promote",
|
|
55
62
|
"reject",
|
|
@@ -64,4 +71,7 @@ __all__ = [
|
|
|
64
71
|
"log_mistake",
|
|
65
72
|
"get_session_metrics",
|
|
66
73
|
"get_experiment_report",
|
|
74
|
+
# Gauntlet loop operations
|
|
75
|
+
"gauntlet_process_issues",
|
|
76
|
+
"gauntlet_accept_risk",
|
|
67
77
|
]
|