buildlog 0.6.1__tar.gz → 0.7.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {buildlog-0.6.1 → buildlog-0.7.0}/.gitignore +8 -0
- {buildlog-0.6.1 → buildlog-0.7.0}/PKG-INFO +71 -17
- {buildlog-0.6.1 → buildlog-0.7.0}/README.md +70 -16
- {buildlog-0.6.1 → buildlog-0.7.0}/pyproject.toml +1 -1
- {buildlog-0.6.1 → buildlog-0.7.0}/src/buildlog/cli.py +153 -0
- {buildlog-0.6.1 → buildlog-0.7.0}/src/buildlog/core/__init__.py +10 -0
- {buildlog-0.6.1 → buildlog-0.7.0}/src/buildlog/core/operations.py +234 -0
- {buildlog-0.6.1 → buildlog-0.7.0}/src/buildlog/mcp/server.py +6 -0
- {buildlog-0.6.1 → buildlog-0.7.0}/src/buildlog/mcp/tools.py +105 -0
- {buildlog-0.6.1 → buildlog-0.7.0}/src/buildlog/render/claude_md.py +17 -4
- {buildlog-0.6.1 → buildlog-0.7.0}/src/buildlog/render/tracking.py +20 -1
- {buildlog-0.6.1 → buildlog-0.7.0}/src/buildlog/seeds.py +41 -0
- {buildlog-0.6.1 → buildlog-0.7.0}/LICENSE +0 -0
- {buildlog-0.6.1 → buildlog-0.7.0}/copier.yml +0 -0
- {buildlog-0.6.1 → buildlog-0.7.0}/post_gen.py +0 -0
- {buildlog-0.6.1 → buildlog-0.7.0}/src/buildlog/__init__.py +0 -0
- {buildlog-0.6.1 → buildlog-0.7.0}/src/buildlog/confidence.py +0 -0
- {buildlog-0.6.1 → buildlog-0.7.0}/src/buildlog/data/__init__.py +0 -0
- {buildlog-0.6.1 → buildlog-0.7.0}/src/buildlog/data/seeds/security_karen.yaml +0 -0
- {buildlog-0.6.1 → buildlog-0.7.0}/src/buildlog/data/seeds/test_terrorist.yaml +0 -0
- {buildlog-0.6.1 → buildlog-0.7.0}/src/buildlog/distill.py +0 -0
- {buildlog-0.6.1 → buildlog-0.7.0}/src/buildlog/embeddings.py +0 -0
- {buildlog-0.6.1 → buildlog-0.7.0}/src/buildlog/mcp/__init__.py +0 -0
- {buildlog-0.6.1 → buildlog-0.7.0}/src/buildlog/render/__init__.py +0 -0
- {buildlog-0.6.1 → buildlog-0.7.0}/src/buildlog/render/base.py +0 -0
- {buildlog-0.6.1 → buildlog-0.7.0}/src/buildlog/render/settings_json.py +0 -0
- {buildlog-0.6.1 → buildlog-0.7.0}/src/buildlog/render/skill.py +0 -0
- {buildlog-0.6.1 → buildlog-0.7.0}/src/buildlog/seed_engine/__init__.py +0 -0
- {buildlog-0.6.1 → buildlog-0.7.0}/src/buildlog/seed_engine/categorizers.py +0 -0
- {buildlog-0.6.1 → buildlog-0.7.0}/src/buildlog/seed_engine/extractors.py +0 -0
- {buildlog-0.6.1 → buildlog-0.7.0}/src/buildlog/seed_engine/generators.py +0 -0
- {buildlog-0.6.1 → buildlog-0.7.0}/src/buildlog/seed_engine/models.py +0 -0
- {buildlog-0.6.1 → buildlog-0.7.0}/src/buildlog/seed_engine/pipeline.py +0 -0
- {buildlog-0.6.1 → buildlog-0.7.0}/src/buildlog/seed_engine/sources.py +0 -0
- {buildlog-0.6.1 → buildlog-0.7.0}/src/buildlog/skills.py +0 -0
- {buildlog-0.6.1 → buildlog-0.7.0}/src/buildlog/stats.py +0 -0
- {buildlog-0.6.1 → buildlog-0.7.0}/template/buildlog/.gitkeep +0 -0
- {buildlog-0.6.1 → buildlog-0.7.0}/template/buildlog/2026-01-01-example.md +0 -0
- {buildlog-0.6.1 → buildlog-0.7.0}/template/buildlog/BUILDLOG_SYSTEM.md +0 -0
- {buildlog-0.6.1 → buildlog-0.7.0}/template/buildlog/_TEMPLATE.md +0 -0
- {buildlog-0.6.1 → buildlog-0.7.0}/template/buildlog/assets/.gitkeep +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: buildlog
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.7.0
|
|
4
4
|
Summary: Engineering notebook for AI-assisted development
|
|
5
5
|
Project-URL: Homepage, https://github.com/Peleke/buildlog-template
|
|
6
6
|
Project-URL: Repository, https://github.com/Peleke/buildlog-template
|
|
@@ -123,11 +123,30 @@ RMR is not the only metric that matters. But it's one we can measure, and measur
|
|
|
123
123
|
|
|
124
124
|
## The Mechanism
|
|
125
125
|
|
|
126
|
-
buildlog
|
|
126
|
+
buildlog is building toward **contextual bandits** for automatic rule selection. Here's where we are:
|
|
127
|
+
|
|
128
|
+
### What Exists Today (v0.7)
|
|
127
129
|
|
|
128
130
|
```
|
|
129
131
|
┌─────────────────────────────────────────────────────────────────┐
|
|
130
|
-
│
|
|
132
|
+
│ CURRENT INFRASTRUCTURE │
|
|
133
|
+
├─────────────────────────────────────────────────────────────────┤
|
|
134
|
+
│ │
|
|
135
|
+
│ ✅ Rule extraction From entries, reviews, curated seeds │
|
|
136
|
+
│ ✅ Confidence scoring Frequency + recency based │
|
|
137
|
+
│ ✅ Reward logging Accept/reject/revision signals │
|
|
138
|
+
│ ✅ Experiment tracking Sessions, mistakes, RMR calculation │
|
|
139
|
+
│ ✅ Review gauntlet Curated persona-based code review │
|
|
140
|
+
│ ⏳ Manual promotion Human selects rules to surface │
|
|
141
|
+
│ │
|
|
142
|
+
└─────────────────────────────────────────────────────────────────┘
|
|
143
|
+
```
|
|
144
|
+
|
|
145
|
+
### What's Coming (v0.8+)
|
|
146
|
+
|
|
147
|
+
```
|
|
148
|
+
┌─────────────────────────────────────────────────────────────────┐
|
|
149
|
+
│ CONTEXTUAL BANDIT (PLANNED) │
|
|
131
150
|
├─────────────────────────────────────────────────────────────────┤
|
|
132
151
|
│ │
|
|
133
152
|
│ Context (c): Error class, file type, task category │
|
|
@@ -147,9 +166,9 @@ buildlog uses **contextual bandits** to select which rules to surface.
|
|
|
147
166
|
|
|
148
167
|
**Reward** = did surfacing this rule actually help?
|
|
149
168
|
|
|
150
|
-
The
|
|
169
|
+
The reward infrastructure exists. The bandit policy is next. Thompson Sampling will provide theoretical guarantees: O(√(KT log K)) regret bounds.
|
|
151
170
|
|
|
152
|
-
|
|
171
|
+
We're building in public—the bandit implementation will be developed with full documentation of the process.
|
|
153
172
|
|
|
154
173
|
---
|
|
155
174
|
|
|
@@ -161,16 +180,20 @@ buildlog captures signal at every stage:
|
|
|
161
180
|
flowchart LR
|
|
162
181
|
A["Work Sessions"] --> B["Structured Entries"]
|
|
163
182
|
B --> C["Extracted Rules"]
|
|
164
|
-
C --> D["
|
|
183
|
+
C --> D["Manual Promotion"]
|
|
165
184
|
D --> E["Rule Surfaced"]
|
|
166
185
|
E --> F["Human Feedback"]
|
|
167
|
-
F --> G["
|
|
168
|
-
G
|
|
186
|
+
F --> G["Reward Logged"]
|
|
187
|
+
G -.-> H["Bandit Policy"]
|
|
188
|
+
H -.-> D
|
|
169
189
|
|
|
170
190
|
style F fill:#ff6b6b,color:#fff
|
|
171
191
|
style G fill:#4ecdc4,color:#fff
|
|
192
|
+
style H fill:#666,color:#fff,stroke-dasharray: 5 5
|
|
172
193
|
```
|
|
173
194
|
|
|
195
|
+
*Dashed: Coming in v0.8 — automatic rule selection via Thompson Sampling*
|
|
196
|
+
|
|
174
197
|
### Stage 1: Capture
|
|
175
198
|
Document your work. Include the fuckups—they're the most valuable signal.
|
|
176
199
|
|
|
@@ -269,6 +292,27 @@ buildlog gauntlet rules --format markdown -o review_checklist.md
|
|
|
269
292
|
buildlog gauntlet learn review_issues.json --source "PR#42"
|
|
270
293
|
```
|
|
271
294
|
|
|
295
|
+
### Gauntlet Loop (Agent Integration)
|
|
296
|
+
|
|
297
|
+
For AI agents, the gauntlet loop automates the fix-rerun cycle:
|
|
298
|
+
|
|
299
|
+
```bash
|
|
300
|
+
buildlog gauntlet loop src/ --persona security_karen --persona test_terrorist
|
|
301
|
+
```
|
|
302
|
+
|
|
303
|
+
The loop provides structured checkpoints:
|
|
304
|
+
|
|
305
|
+
| Severity | Action | Human Needed? |
|
|
306
|
+
|----------|--------|---------------|
|
|
307
|
+
| **Critical** | Agent fixes, reruns | No |
|
|
308
|
+
| **Major** | Checkpoint: continue? | Yes |
|
|
309
|
+
| **Minor** | Accept risk or fix? | Yes |
|
|
310
|
+
| **Clean** | Done | No |
|
|
311
|
+
|
|
312
|
+
MCP tools for agent integration:
|
|
313
|
+
- `buildlog_gauntlet_issues` — Report findings, get next action
|
|
314
|
+
- `buildlog_gauntlet_accept_risk` — Accept remaining issues (optionally create GitHub issues)
|
|
315
|
+
|
|
272
316
|
The gauntlet integrates with the learning loop—issues found become rules that accumulate confidence.
|
|
273
317
|
|
|
274
318
|
---
|
|
@@ -359,6 +403,8 @@ Available tools:
|
|
|
359
403
|
| `buildlog_start_session` | Begin tracked experiment |
|
|
360
404
|
| `buildlog_log_mistake` | Record mistake during session |
|
|
361
405
|
| `buildlog_experiment_report` | Full experiment report |
|
|
406
|
+
| `buildlog_gauntlet_issues` | Report gauntlet findings, get next action |
|
|
407
|
+
| `buildlog_gauntlet_accept_risk` | Accept remaining issues, optionally create GH issues |
|
|
362
408
|
|
|
363
409
|
### CLI Commands
|
|
364
410
|
|
|
@@ -382,6 +428,7 @@ buildlog gauntlet list # Show reviewers
|
|
|
382
428
|
buildlog gauntlet rules # Export rules
|
|
383
429
|
buildlog gauntlet prompt <path> # Generate review prompt
|
|
384
430
|
buildlog gauntlet learn <file> # Persist learnings
|
|
431
|
+
buildlog gauntlet loop <path> # Auto-fix loop with HITL checkpoints
|
|
385
432
|
```
|
|
386
433
|
|
|
387
434
|
---
|
|
@@ -421,21 +468,28 @@ This is how you know. Not vibes. Data.
|
|
|
421
468
|
|
|
422
469
|
For the technically curious:
|
|
423
470
|
|
|
424
|
-
| Concept | Application in buildlog |
|
|
425
|
-
|
|
426
|
-
| **
|
|
427
|
-
| **
|
|
428
|
-
| **
|
|
429
|
-
| **
|
|
430
|
-
| **
|
|
471
|
+
| Concept | Application in buildlog | Status |
|
|
472
|
+
|---------|------------------------|--------|
|
|
473
|
+
| **Confidence scoring** | Frequency + recency decay | ✅ Implemented |
|
|
474
|
+
| **Semantic hashing** | Mistake deduplication for RMR | ✅ Implemented |
|
|
475
|
+
| **Reward signals** | Binary feedback infrastructure | ✅ Implemented |
|
|
476
|
+
| **Thompson Sampling** | Rule selection under uncertainty | ⏳ Planned (v0.8) |
|
|
477
|
+
| **Beta-Bernoulli model** | Posterior updates from binary reward | ⏳ Planned (v0.8) |
|
|
478
|
+
| **Contextual bandits** | Context-dependent rule selection | ⏳ Planned (v0.8) |
|
|
479
|
+
| **Regret bounds** | O(√(KT log K)) theoretical guarantee | ⏳ Planned (v0.8) |
|
|
431
480
|
|
|
432
|
-
We're not inventing new math. We're applying proven frameworks to a new domain.
|
|
481
|
+
We're not inventing new math. We're applying proven frameworks to a new domain. The infrastructure for reward collection is live; the bandit policy is the next milestone.
|
|
433
482
|
|
|
434
483
|
---
|
|
435
484
|
|
|
436
485
|
## Honest Limitations
|
|
437
486
|
|
|
438
|
-
|
|
487
|
+
### Not Yet Implemented
|
|
488
|
+
|
|
489
|
+
- **Automatic rule selection**: Currently manual promotion; Thompson Sampling bandit planned for v0.8
|
|
490
|
+
- **Context-aware surfacing**: Rules are surfaced globally, not based on task context
|
|
491
|
+
|
|
492
|
+
### Hard Problems We're Working On
|
|
439
493
|
|
|
440
494
|
- **Credit assignment**: When multiple rules are active, which one helped?
|
|
441
495
|
- **Non-stationarity**: Developer skill changes over time
|
|
@@ -75,11 +75,30 @@ RMR is not the only metric that matters. But it's one we can measure, and measur
|
|
|
75
75
|
|
|
76
76
|
## The Mechanism
|
|
77
77
|
|
|
78
|
-
buildlog
|
|
78
|
+
buildlog is building toward **contextual bandits** for automatic rule selection. Here's where we are:
|
|
79
|
+
|
|
80
|
+
### What Exists Today (v0.7)
|
|
79
81
|
|
|
80
82
|
```
|
|
81
83
|
┌─────────────────────────────────────────────────────────────────┐
|
|
82
|
-
│
|
|
84
|
+
│ CURRENT INFRASTRUCTURE │
|
|
85
|
+
├─────────────────────────────────────────────────────────────────┤
|
|
86
|
+
│ │
|
|
87
|
+
│ ✅ Rule extraction From entries, reviews, curated seeds │
|
|
88
|
+
│ ✅ Confidence scoring Frequency + recency based │
|
|
89
|
+
│ ✅ Reward logging Accept/reject/revision signals │
|
|
90
|
+
│ ✅ Experiment tracking Sessions, mistakes, RMR calculation │
|
|
91
|
+
│ ✅ Review gauntlet Curated persona-based code review │
|
|
92
|
+
│ ⏳ Manual promotion Human selects rules to surface │
|
|
93
|
+
│ │
|
|
94
|
+
└─────────────────────────────────────────────────────────────────┘
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
### What's Coming (v0.8+)
|
|
98
|
+
|
|
99
|
+
```
|
|
100
|
+
┌─────────────────────────────────────────────────────────────────┐
|
|
101
|
+
│ CONTEXTUAL BANDIT (PLANNED) │
|
|
83
102
|
├─────────────────────────────────────────────────────────────────┤
|
|
84
103
|
│ │
|
|
85
104
|
│ Context (c): Error class, file type, task category │
|
|
@@ -99,9 +118,9 @@ buildlog uses **contextual bandits** to select which rules to surface.
|
|
|
99
118
|
|
|
100
119
|
**Reward** = did surfacing this rule actually help?
|
|
101
120
|
|
|
102
|
-
The
|
|
121
|
+
The reward infrastructure exists. The bandit policy is next. Thompson Sampling will provide theoretical guarantees: O(√(KT log K)) regret bounds.
|
|
103
122
|
|
|
104
|
-
|
|
123
|
+
We're building in public—the bandit implementation will be developed with full documentation of the process.
|
|
105
124
|
|
|
106
125
|
---
|
|
107
126
|
|
|
@@ -113,16 +132,20 @@ buildlog captures signal at every stage:
|
|
|
113
132
|
flowchart LR
|
|
114
133
|
A["Work Sessions"] --> B["Structured Entries"]
|
|
115
134
|
B --> C["Extracted Rules"]
|
|
116
|
-
C --> D["
|
|
135
|
+
C --> D["Manual Promotion"]
|
|
117
136
|
D --> E["Rule Surfaced"]
|
|
118
137
|
E --> F["Human Feedback"]
|
|
119
|
-
F --> G["
|
|
120
|
-
G
|
|
138
|
+
F --> G["Reward Logged"]
|
|
139
|
+
G -.-> H["Bandit Policy"]
|
|
140
|
+
H -.-> D
|
|
121
141
|
|
|
122
142
|
style F fill:#ff6b6b,color:#fff
|
|
123
143
|
style G fill:#4ecdc4,color:#fff
|
|
144
|
+
style H fill:#666,color:#fff,stroke-dasharray: 5 5
|
|
124
145
|
```
|
|
125
146
|
|
|
147
|
+
*Dashed: Coming in v0.8 — automatic rule selection via Thompson Sampling*
|
|
148
|
+
|
|
126
149
|
### Stage 1: Capture
|
|
127
150
|
Document your work. Include the fuckups—they're the most valuable signal.
|
|
128
151
|
|
|
@@ -221,6 +244,27 @@ buildlog gauntlet rules --format markdown -o review_checklist.md
|
|
|
221
244
|
buildlog gauntlet learn review_issues.json --source "PR#42"
|
|
222
245
|
```
|
|
223
246
|
|
|
247
|
+
### Gauntlet Loop (Agent Integration)
|
|
248
|
+
|
|
249
|
+
For AI agents, the gauntlet loop automates the fix-rerun cycle:
|
|
250
|
+
|
|
251
|
+
```bash
|
|
252
|
+
buildlog gauntlet loop src/ --persona security_karen --persona test_terrorist
|
|
253
|
+
```
|
|
254
|
+
|
|
255
|
+
The loop provides structured checkpoints:
|
|
256
|
+
|
|
257
|
+
| Severity | Action | Human Needed? |
|
|
258
|
+
|----------|--------|---------------|
|
|
259
|
+
| **Critical** | Agent fixes, reruns | No |
|
|
260
|
+
| **Major** | Checkpoint: continue? | Yes |
|
|
261
|
+
| **Minor** | Accept risk or fix? | Yes |
|
|
262
|
+
| **Clean** | Done | No |
|
|
263
|
+
|
|
264
|
+
MCP tools for agent integration:
|
|
265
|
+
- `buildlog_gauntlet_issues` — Report findings, get next action
|
|
266
|
+
- `buildlog_gauntlet_accept_risk` — Accept remaining issues (optionally create GitHub issues)
|
|
267
|
+
|
|
224
268
|
The gauntlet integrates with the learning loop—issues found become rules that accumulate confidence.
|
|
225
269
|
|
|
226
270
|
---
|
|
@@ -311,6 +355,8 @@ Available tools:
|
|
|
311
355
|
| `buildlog_start_session` | Begin tracked experiment |
|
|
312
356
|
| `buildlog_log_mistake` | Record mistake during session |
|
|
313
357
|
| `buildlog_experiment_report` | Full experiment report |
|
|
358
|
+
| `buildlog_gauntlet_issues` | Report gauntlet findings, get next action |
|
|
359
|
+
| `buildlog_gauntlet_accept_risk` | Accept remaining issues, optionally create GH issues |
|
|
314
360
|
|
|
315
361
|
### CLI Commands
|
|
316
362
|
|
|
@@ -334,6 +380,7 @@ buildlog gauntlet list # Show reviewers
|
|
|
334
380
|
buildlog gauntlet rules # Export rules
|
|
335
381
|
buildlog gauntlet prompt <path> # Generate review prompt
|
|
336
382
|
buildlog gauntlet learn <file> # Persist learnings
|
|
383
|
+
buildlog gauntlet loop <path> # Auto-fix loop with HITL checkpoints
|
|
337
384
|
```
|
|
338
385
|
|
|
339
386
|
---
|
|
@@ -373,21 +420,28 @@ This is how you know. Not vibes. Data.
|
|
|
373
420
|
|
|
374
421
|
For the technically curious:
|
|
375
422
|
|
|
376
|
-
| Concept | Application in buildlog |
|
|
377
|
-
|
|
378
|
-
| **
|
|
379
|
-
| **
|
|
380
|
-
| **
|
|
381
|
-
| **
|
|
382
|
-
| **
|
|
423
|
+
| Concept | Application in buildlog | Status |
|
|
424
|
+
|---------|------------------------|--------|
|
|
425
|
+
| **Confidence scoring** | Frequency + recency decay | ✅ Implemented |
|
|
426
|
+
| **Semantic hashing** | Mistake deduplication for RMR | ✅ Implemented |
|
|
427
|
+
| **Reward signals** | Binary feedback infrastructure | ✅ Implemented |
|
|
428
|
+
| **Thompson Sampling** | Rule selection under uncertainty | ⏳ Planned (v0.8) |
|
|
429
|
+
| **Beta-Bernoulli model** | Posterior updates from binary reward | ⏳ Planned (v0.8) |
|
|
430
|
+
| **Contextual bandits** | Context-dependent rule selection | ⏳ Planned (v0.8) |
|
|
431
|
+
| **Regret bounds** | O(√(KT log K)) theoretical guarantee | ⏳ Planned (v0.8) |
|
|
383
432
|
|
|
384
|
-
We're not inventing new math. We're applying proven frameworks to a new domain.
|
|
433
|
+
We're not inventing new math. We're applying proven frameworks to a new domain. The infrastructure for reward collection is live; the bandit policy is the next milestone.
|
|
385
434
|
|
|
386
435
|
---
|
|
387
436
|
|
|
388
437
|
## Honest Limitations
|
|
389
438
|
|
|
390
|
-
|
|
439
|
+
### Not Yet Implemented
|
|
440
|
+
|
|
441
|
+
- **Automatic rule selection**: Currently manual promotion; Thompson Sampling bandit planned for v0.8
|
|
442
|
+
- **Context-aware surfacing**: Rules are surfaced globally, not based on task context
|
|
443
|
+
|
|
444
|
+
### Hard Problems We're Working On
|
|
391
445
|
|
|
392
446
|
- **Credit assignment**: When multiple rules are active, which one helped?
|
|
393
447
|
- **Non-stationarity**: Developer skill changes over time
|
|
@@ -1264,5 +1264,158 @@ def gauntlet_learn(issues_file: str, source: str | None, output_json: bool):
|
|
|
1264
1264
|
click.echo(f" Total processed: {result.total_issues_processed}")
|
|
1265
1265
|
|
|
1266
1266
|
|
|
1267
|
+
@gauntlet.command("loop")
|
|
1268
|
+
@click.argument("target", type=click.Path(exists=True))
|
|
1269
|
+
@click.option(
|
|
1270
|
+
"--persona",
|
|
1271
|
+
"-p",
|
|
1272
|
+
multiple=True,
|
|
1273
|
+
help="Personas to run (default: all)",
|
|
1274
|
+
)
|
|
1275
|
+
@click.option(
|
|
1276
|
+
"--max-iterations",
|
|
1277
|
+
"-n",
|
|
1278
|
+
default=10,
|
|
1279
|
+
help="Maximum iterations to prevent infinite loops (default: 10)",
|
|
1280
|
+
)
|
|
1281
|
+
@click.option(
|
|
1282
|
+
"--stop-at",
|
|
1283
|
+
type=click.Choice(["criticals", "majors", "minors"]),
|
|
1284
|
+
default="minors",
|
|
1285
|
+
help="Stop after clearing this severity level (default: minors)",
|
|
1286
|
+
)
|
|
1287
|
+
@click.option(
|
|
1288
|
+
"--auto-gh-issues",
|
|
1289
|
+
is_flag=True,
|
|
1290
|
+
help="Create GitHub issues for remaining items when accepting risk",
|
|
1291
|
+
)
|
|
1292
|
+
@click.option("--json", "output_json", is_flag=True, help="Output as JSON")
|
|
1293
|
+
def gauntlet_loop(
|
|
1294
|
+
target: str,
|
|
1295
|
+
persona: tuple[str, ...],
|
|
1296
|
+
max_iterations: int,
|
|
1297
|
+
stop_at: str,
|
|
1298
|
+
auto_gh_issues: bool,
|
|
1299
|
+
output_json: bool,
|
|
1300
|
+
):
|
|
1301
|
+
"""Run the gauntlet loop: review, fix, repeat until clean.
|
|
1302
|
+
|
|
1303
|
+
This command orchestrates the gauntlet loop workflow:
|
|
1304
|
+
|
|
1305
|
+
1. Generate review prompt for target code
|
|
1306
|
+
2. Process issues and determine action
|
|
1307
|
+
3. On criticals: output fix instructions, expect re-run
|
|
1308
|
+
4. On majors only: checkpoint (ask to continue)
|
|
1309
|
+
5. On minors only: checkpoint (accept risk?)
|
|
1310
|
+
6. Optionally create GitHub issues for remaining items
|
|
1311
|
+
|
|
1312
|
+
The loop is designed to be run interactively with an agent
|
|
1313
|
+
(Claude Code, Cursor, etc.) that does the actual fixing.
|
|
1314
|
+
|
|
1315
|
+
Examples:
|
|
1316
|
+
|
|
1317
|
+
buildlog gauntlet loop src/
|
|
1318
|
+
buildlog gauntlet loop tests/ --stop-at majors
|
|
1319
|
+
buildlog gauntlet loop . --auto-gh-issues
|
|
1320
|
+
"""
|
|
1321
|
+
import json as json_module
|
|
1322
|
+
|
|
1323
|
+
from buildlog.seeds import get_default_seeds_dir, load_all_seeds
|
|
1324
|
+
|
|
1325
|
+
# Find seeds directory
|
|
1326
|
+
seeds_dir = get_default_seeds_dir()
|
|
1327
|
+
|
|
1328
|
+
if seeds_dir is None:
|
|
1329
|
+
click.echo("No seed files found.", err=True)
|
|
1330
|
+
raise SystemExit(1)
|
|
1331
|
+
|
|
1332
|
+
seeds = load_all_seeds(seeds_dir)
|
|
1333
|
+
|
|
1334
|
+
if not seeds:
|
|
1335
|
+
click.echo("No seed files found in directory.", err=True)
|
|
1336
|
+
raise SystemExit(1)
|
|
1337
|
+
|
|
1338
|
+
# Filter personas
|
|
1339
|
+
if persona:
|
|
1340
|
+
seeds = {k: v for k, v in seeds.items() if k in persona}
|
|
1341
|
+
if not seeds:
|
|
1342
|
+
click.echo(f"No matching personas: {', '.join(persona)}", err=True)
|
|
1343
|
+
raise SystemExit(1)
|
|
1344
|
+
|
|
1345
|
+
target_path = Path(target)
|
|
1346
|
+
|
|
1347
|
+
# Generate persona rules summary
|
|
1348
|
+
rules_by_persona: dict[str, list[dict[str, str]]] = {}
|
|
1349
|
+
for name, sf in seeds.items():
|
|
1350
|
+
rules_by_persona[name] = [
|
|
1351
|
+
{"rule": r.rule, "antipattern": r.antipattern, "category": r.category}
|
|
1352
|
+
for r in sf.rules
|
|
1353
|
+
]
|
|
1354
|
+
|
|
1355
|
+
# Loop instructions
|
|
1356
|
+
instructions = [
|
|
1357
|
+
"1. Review the target code using the rules from each persona",
|
|
1358
|
+
"2. Report all violations as JSON issues with: severity, category, description, rule_learned, location",
|
|
1359
|
+
"3. Call `buildlog_gauntlet_issues` with the issues list to determine next action",
|
|
1360
|
+
"4. If action='fix_criticals': Fix critical+major issues, then re-run gauntlet",
|
|
1361
|
+
"5. If action='checkpoint_majors': Ask user whether to continue fixing majors",
|
|
1362
|
+
"6. If action='checkpoint_minors': Ask user whether to accept risk or continue",
|
|
1363
|
+
"7. If user accepts risk and --auto-gh-issues: Call `buildlog_gauntlet_accept_risk` with remaining issues",
|
|
1364
|
+
"8. Repeat until action='clean' or max_iterations reached",
|
|
1365
|
+
]
|
|
1366
|
+
|
|
1367
|
+
# Expected issue format
|
|
1368
|
+
issue_format = {
|
|
1369
|
+
"severity": "critical|major|minor|nitpick",
|
|
1370
|
+
"category": "security|testing|architectural|workflow|...",
|
|
1371
|
+
"description": "Concrete description of what's wrong",
|
|
1372
|
+
"rule_learned": "Generalizable rule for the future",
|
|
1373
|
+
"location": "file:line (optional)",
|
|
1374
|
+
}
|
|
1375
|
+
|
|
1376
|
+
# Build the loop output
|
|
1377
|
+
output = {
|
|
1378
|
+
"command": "gauntlet_loop",
|
|
1379
|
+
"target": str(target_path),
|
|
1380
|
+
"personas": list(seeds.keys()),
|
|
1381
|
+
"max_iterations": max_iterations,
|
|
1382
|
+
"stop_at": stop_at,
|
|
1383
|
+
"auto_gh_issues": auto_gh_issues,
|
|
1384
|
+
"rules_by_persona": rules_by_persona,
|
|
1385
|
+
"instructions": instructions,
|
|
1386
|
+
"issue_format": issue_format,
|
|
1387
|
+
}
|
|
1388
|
+
|
|
1389
|
+
if output_json:
|
|
1390
|
+
click.echo(json_module.dumps(output, indent=2))
|
|
1391
|
+
else:
|
|
1392
|
+
# Human-readable output
|
|
1393
|
+
click.echo("=" * 60)
|
|
1394
|
+
click.echo("GAUNTLET LOOP")
|
|
1395
|
+
click.echo("=" * 60)
|
|
1396
|
+
click.echo(f"\nTarget: {target_path}")
|
|
1397
|
+
click.echo(f"Personas: {', '.join(seeds.keys())}")
|
|
1398
|
+
click.echo(f"Max iterations: {max_iterations}")
|
|
1399
|
+
click.echo(f"Stop at: {stop_at}")
|
|
1400
|
+
click.echo(f"Auto GH issues: {auto_gh_issues}")
|
|
1401
|
+
|
|
1402
|
+
click.echo("\n--- RULES ---")
|
|
1403
|
+
for name, rules in rules_by_persona.items():
|
|
1404
|
+
click.echo(f"\n## {name.replace('_', ' ').title()}")
|
|
1405
|
+
for r in rules:
|
|
1406
|
+
click.echo(f" • {r['rule']}")
|
|
1407
|
+
|
|
1408
|
+
click.echo("\n--- LOOP WORKFLOW ---")
|
|
1409
|
+
for instruction in instructions:
|
|
1410
|
+
click.echo(f" {instruction}")
|
|
1411
|
+
|
|
1412
|
+
click.echo("\n--- ISSUE FORMAT ---")
|
|
1413
|
+
click.echo(json_module.dumps(issue_format, indent=2))
|
|
1414
|
+
|
|
1415
|
+
click.echo("\n" + "=" * 60)
|
|
1416
|
+
click.echo("Ready. Run gauntlet review and process issues.")
|
|
1417
|
+
click.echo("=" * 60)
|
|
1418
|
+
|
|
1419
|
+
|
|
1267
1420
|
if __name__ == "__main__":
|
|
1268
1421
|
main()
|
|
@@ -3,6 +3,8 @@
|
|
|
3
3
|
from buildlog.core.operations import (
|
|
4
4
|
DiffResult,
|
|
5
5
|
EndSessionResult,
|
|
6
|
+
GauntletAcceptRiskResult,
|
|
7
|
+
GauntletLoopResult,
|
|
6
8
|
LearnFromReviewResult,
|
|
7
9
|
LogMistakeResult,
|
|
8
10
|
LogRewardResult,
|
|
@@ -20,6 +22,8 @@ from buildlog.core.operations import (
|
|
|
20
22
|
diff,
|
|
21
23
|
end_session,
|
|
22
24
|
find_skills_by_ids,
|
|
25
|
+
gauntlet_accept_risk,
|
|
26
|
+
gauntlet_process_issues,
|
|
23
27
|
get_experiment_report,
|
|
24
28
|
get_rewards,
|
|
25
29
|
get_session_metrics,
|
|
@@ -50,6 +54,9 @@ __all__ = [
|
|
|
50
54
|
"StartSessionResult",
|
|
51
55
|
"EndSessionResult",
|
|
52
56
|
"LogMistakeResult",
|
|
57
|
+
# Gauntlet loop
|
|
58
|
+
"GauntletLoopResult",
|
|
59
|
+
"GauntletAcceptRiskResult",
|
|
53
60
|
"status",
|
|
54
61
|
"promote",
|
|
55
62
|
"reject",
|
|
@@ -64,4 +71,7 @@ __all__ = [
|
|
|
64
71
|
"log_mistake",
|
|
65
72
|
"get_session_metrics",
|
|
66
73
|
"get_experiment_report",
|
|
74
|
+
# Gauntlet loop operations
|
|
75
|
+
"gauntlet_process_issues",
|
|
76
|
+
"gauntlet_accept_risk",
|
|
67
77
|
]
|
|
@@ -35,6 +35,9 @@ __all__ = [
|
|
|
35
35
|
"StartSessionResult",
|
|
36
36
|
"EndSessionResult",
|
|
37
37
|
"LogMistakeResult",
|
|
38
|
+
# Gauntlet loop
|
|
39
|
+
"GauntletLoopResult",
|
|
40
|
+
"GauntletAcceptRiskResult",
|
|
38
41
|
"status",
|
|
39
42
|
"promote",
|
|
40
43
|
"reject",
|
|
@@ -49,6 +52,9 @@ __all__ = [
|
|
|
49
52
|
"log_mistake",
|
|
50
53
|
"get_session_metrics",
|
|
51
54
|
"get_experiment_report",
|
|
55
|
+
# Gauntlet loop operations
|
|
56
|
+
"gauntlet_process_issues",
|
|
57
|
+
"gauntlet_accept_risk",
|
|
52
58
|
]
|
|
53
59
|
|
|
54
60
|
|
|
@@ -1652,3 +1658,231 @@ def get_experiment_report(buildlog_dir: Path) -> dict:
|
|
|
1652
1658
|
"sessions": session_metrics,
|
|
1653
1659
|
"error_classes": error_classes,
|
|
1654
1660
|
}
|
|
1661
|
+
|
|
1662
|
+
|
|
1663
|
+
# =============================================================================
|
|
1664
|
+
# Gauntlet Loop Operations
|
|
1665
|
+
# =============================================================================
|
|
1666
|
+
|
|
1667
|
+
|
|
1668
|
+
@dataclass
|
|
1669
|
+
class GauntletLoopResult:
|
|
1670
|
+
"""Result of processing gauntlet issues.
|
|
1671
|
+
|
|
1672
|
+
Attributes:
|
|
1673
|
+
action: What to do next:
|
|
1674
|
+
- "fix_criticals": Criticals remain, auto-fix and loop
|
|
1675
|
+
- "checkpoint_majors": No criticals, but majors remain (HITL)
|
|
1676
|
+
- "checkpoint_minors": Only minors remain (HITL)
|
|
1677
|
+
- "clean": No issues remain
|
|
1678
|
+
criticals: List of critical severity issues
|
|
1679
|
+
majors: List of major severity issues
|
|
1680
|
+
minors: List of minor/nitpick severity issues
|
|
1681
|
+
iteration: Current iteration number
|
|
1682
|
+
learnings_persisted: Number of learnings persisted this iteration
|
|
1683
|
+
message: Human-readable summary
|
|
1684
|
+
"""
|
|
1685
|
+
|
|
1686
|
+
action: Literal["fix_criticals", "checkpoint_majors", "checkpoint_minors", "clean"]
|
|
1687
|
+
criticals: list[dict]
|
|
1688
|
+
majors: list[dict]
|
|
1689
|
+
minors: list[dict]
|
|
1690
|
+
iteration: int
|
|
1691
|
+
learnings_persisted: int
|
|
1692
|
+
message: str
|
|
1693
|
+
|
|
1694
|
+
|
|
1695
|
+
@dataclass
|
|
1696
|
+
class GauntletAcceptRiskResult:
|
|
1697
|
+
"""Result of accepting risk with remaining issues.
|
|
1698
|
+
|
|
1699
|
+
Attributes:
|
|
1700
|
+
accepted_issues: Number of issues accepted as risk
|
|
1701
|
+
github_issues_created: Number of GitHub issues created (if enabled)
|
|
1702
|
+
github_issue_urls: URLs of created GitHub issues
|
|
1703
|
+
message: Human-readable summary
|
|
1704
|
+
error: Error message if operation failed
|
|
1705
|
+
"""
|
|
1706
|
+
|
|
1707
|
+
accepted_issues: int
|
|
1708
|
+
github_issues_created: int
|
|
1709
|
+
github_issue_urls: list[str]
|
|
1710
|
+
message: str
|
|
1711
|
+
error: str | None = None
|
|
1712
|
+
|
|
1713
|
+
|
|
1714
|
+
def gauntlet_process_issues(
|
|
1715
|
+
buildlog_dir: Path,
|
|
1716
|
+
issues: list[dict],
|
|
1717
|
+
iteration: int = 1,
|
|
1718
|
+
source: str | None = None,
|
|
1719
|
+
) -> GauntletLoopResult:
|
|
1720
|
+
"""Process gauntlet issues and determine next action.
|
|
1721
|
+
|
|
1722
|
+
Categorizes issues by severity, persists learnings, and returns
|
|
1723
|
+
the appropriate next action for the gauntlet loop.
|
|
1724
|
+
|
|
1725
|
+
Args:
|
|
1726
|
+
buildlog_dir: Path to buildlog directory.
|
|
1727
|
+
issues: List of issues from the gauntlet review.
|
|
1728
|
+
iteration: Current iteration number (for tracking).
|
|
1729
|
+
source: Optional source identifier for learnings.
|
|
1730
|
+
|
|
1731
|
+
Returns:
|
|
1732
|
+
GauntletLoopResult with categorized issues and next action.
|
|
1733
|
+
"""
|
|
1734
|
+
# Categorize by severity
|
|
1735
|
+
criticals = [i for i in issues if i.get("severity") == "critical"]
|
|
1736
|
+
majors = [i for i in issues if i.get("severity") == "major"]
|
|
1737
|
+
minors = [i for i in issues if i.get("severity") in ("minor", "nitpick", None)]
|
|
1738
|
+
|
|
1739
|
+
# Persist learnings for this iteration
|
|
1740
|
+
learn_source = source or f"gauntlet:iteration-{iteration}"
|
|
1741
|
+
learn_result = learn_from_review(buildlog_dir, issues, learn_source)
|
|
1742
|
+
learnings_persisted = len(learn_result.new_learnings) + len(
|
|
1743
|
+
learn_result.reinforced_learnings
|
|
1744
|
+
)
|
|
1745
|
+
|
|
1746
|
+
# Determine action
|
|
1747
|
+
if criticals:
|
|
1748
|
+
action: Literal[
|
|
1749
|
+
"fix_criticals", "checkpoint_majors", "checkpoint_minors", "clean"
|
|
1750
|
+
] = "fix_criticals"
|
|
1751
|
+
message = (
|
|
1752
|
+
f"Iteration {iteration}: {len(criticals)} critical, "
|
|
1753
|
+
f"{len(majors)} major, {len(minors)} minor. "
|
|
1754
|
+
f"Fix criticals (and majors) then re-run."
|
|
1755
|
+
)
|
|
1756
|
+
elif majors:
|
|
1757
|
+
action = "checkpoint_majors"
|
|
1758
|
+
message = (
|
|
1759
|
+
f"Iteration {iteration}: No criticals! "
|
|
1760
|
+
f"{len(majors)} major, {len(minors)} minor remain. "
|
|
1761
|
+
f"Continue clearing majors?"
|
|
1762
|
+
)
|
|
1763
|
+
elif minors:
|
|
1764
|
+
action = "checkpoint_minors"
|
|
1765
|
+
message = (
|
|
1766
|
+
f"Iteration {iteration}: Only {len(minors)} minor issues remain. "
|
|
1767
|
+
f"Accept risk or continue?"
|
|
1768
|
+
)
|
|
1769
|
+
else:
|
|
1770
|
+
action = "clean"
|
|
1771
|
+
message = f"Iteration {iteration}: All clear! No issues found."
|
|
1772
|
+
|
|
1773
|
+
return GauntletLoopResult(
|
|
1774
|
+
action=action,
|
|
1775
|
+
criticals=criticals,
|
|
1776
|
+
majors=majors,
|
|
1777
|
+
minors=minors,
|
|
1778
|
+
iteration=iteration,
|
|
1779
|
+
learnings_persisted=learnings_persisted,
|
|
1780
|
+
message=message,
|
|
1781
|
+
)
|
|
1782
|
+
|
|
1783
|
+
|
|
1784
|
+
def gauntlet_accept_risk(
|
|
1785
|
+
remaining_issues: list[dict],
|
|
1786
|
+
create_github_issues: bool = False,
|
|
1787
|
+
repo: str | None = None,
|
|
1788
|
+
) -> GauntletAcceptRiskResult:
|
|
1789
|
+
"""Accept risk for remaining issues, optionally creating GitHub issues.
|
|
1790
|
+
|
|
1791
|
+
Args:
|
|
1792
|
+
remaining_issues: Issues being accepted as risk.
|
|
1793
|
+
create_github_issues: Whether to create GitHub issues for tracking.
|
|
1794
|
+
repo: Repository for GitHub issues (uses current repo if None).
|
|
1795
|
+
|
|
1796
|
+
Returns:
|
|
1797
|
+
GauntletAcceptRiskResult with created issue info.
|
|
1798
|
+
"""
|
|
1799
|
+
import subprocess
|
|
1800
|
+
|
|
1801
|
+
github_urls: list[str] = []
|
|
1802
|
+
error: str | None = None
|
|
1803
|
+
|
|
1804
|
+
if create_github_issues and remaining_issues:
|
|
1805
|
+
for issue in remaining_issues:
|
|
1806
|
+
severity = issue.get("severity", "minor")
|
|
1807
|
+
rule = issue.get("rule_learned", issue.get("description", "Unknown"))
|
|
1808
|
+
description = issue.get("description", "")
|
|
1809
|
+
location = issue.get("location", "")
|
|
1810
|
+
|
|
1811
|
+
# Sanitize inputs for GitHub issue creation
|
|
1812
|
+
# Note: We use list args (not shell=True), so this is defense-in-depth
|
|
1813
|
+
def _sanitize_for_gh(text: str, max_len: int = 256) -> str:
|
|
1814
|
+
"""Sanitize text for GitHub issue fields."""
|
|
1815
|
+
# Remove/replace problematic characters
|
|
1816
|
+
sanitized = text.replace("\n", " ").replace("\r", " ")
|
|
1817
|
+
# Truncate to max length
|
|
1818
|
+
if len(sanitized) > max_len:
|
|
1819
|
+
sanitized = sanitized[: max_len - 3] + "..."
|
|
1820
|
+
return sanitized.strip()
|
|
1821
|
+
|
|
1822
|
+
safe_severity = _sanitize_for_gh(str(severity), 20)
|
|
1823
|
+
safe_rule = _sanitize_for_gh(str(rule), 200)
|
|
1824
|
+
safe_description = _sanitize_for_gh(str(description), 1000)
|
|
1825
|
+
safe_location = _sanitize_for_gh(str(location), 100)
|
|
1826
|
+
|
|
1827
|
+
# Build issue body
|
|
1828
|
+
body_parts = [
|
|
1829
|
+
f"**Severity:** {safe_severity}",
|
|
1830
|
+
f"**Rule:** {safe_rule}",
|
|
1831
|
+
"",
|
|
1832
|
+
"## Description",
|
|
1833
|
+
safe_description,
|
|
1834
|
+
]
|
|
1835
|
+
if safe_location:
|
|
1836
|
+
body_parts.extend(["", f"**Location:** `{safe_location}`"])
|
|
1837
|
+
|
|
1838
|
+
body_parts.extend(
|
|
1839
|
+
[
|
|
1840
|
+
"",
|
|
1841
|
+
"---",
|
|
1842
|
+
"_Created by buildlog gauntlet loop (accepted risk)_",
|
|
1843
|
+
]
|
|
1844
|
+
)
|
|
1845
|
+
|
|
1846
|
+
body = "\n".join(body_parts)
|
|
1847
|
+
title = f"[Gauntlet/{safe_severity}] {safe_rule[:60]}"
|
|
1848
|
+
|
|
1849
|
+
# Create GitHub issue
|
|
1850
|
+
cmd = [
|
|
1851
|
+
"gh",
|
|
1852
|
+
"issue",
|
|
1853
|
+
"create",
|
|
1854
|
+
"--title",
|
|
1855
|
+
title,
|
|
1856
|
+
"--body",
|
|
1857
|
+
body,
|
|
1858
|
+
"--label",
|
|
1859
|
+
severity,
|
|
1860
|
+
]
|
|
1861
|
+
if repo:
|
|
1862
|
+
cmd.extend(["--repo", repo])
|
|
1863
|
+
|
|
1864
|
+
try:
|
|
1865
|
+
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
|
|
1866
|
+
# gh issue create outputs the URL
|
|
1867
|
+
url = result.stdout.strip()
|
|
1868
|
+
if url:
|
|
1869
|
+
github_urls.append(url)
|
|
1870
|
+
except subprocess.CalledProcessError as e:
|
|
1871
|
+
# Don't fail entirely, just note the error
|
|
1872
|
+
error = f"Failed to create some GitHub issues: {e.stderr}"
|
|
1873
|
+
except FileNotFoundError:
|
|
1874
|
+
error = "gh CLI not found. Install GitHub CLI to create issues."
|
|
1875
|
+
break
|
|
1876
|
+
|
|
1877
|
+
return GauntletAcceptRiskResult(
|
|
1878
|
+
accepted_issues=len(remaining_issues),
|
|
1879
|
+
github_issues_created=len(github_urls),
|
|
1880
|
+
github_issue_urls=github_urls,
|
|
1881
|
+
message=(
|
|
1882
|
+
f"Accepted {len(remaining_issues)} issues as risk. "
|
|
1883
|
+
f"Created {len(github_urls)} GitHub issues."
|
|
1884
|
+
if create_github_issues
|
|
1885
|
+
else f"Accepted {len(remaining_issues)} issues as risk."
|
|
1886
|
+
),
|
|
1887
|
+
error=error,
|
|
1888
|
+
)
|
|
@@ -8,6 +8,8 @@ from buildlog.mcp.tools import (
|
|
|
8
8
|
buildlog_diff,
|
|
9
9
|
buildlog_end_session,
|
|
10
10
|
buildlog_experiment_report,
|
|
11
|
+
buildlog_gauntlet_accept_risk,
|
|
12
|
+
buildlog_gauntlet_issues,
|
|
11
13
|
buildlog_learn_from_review,
|
|
12
14
|
buildlog_log_mistake,
|
|
13
15
|
buildlog_log_reward,
|
|
@@ -37,6 +39,10 @@ mcp.tool()(buildlog_log_mistake)
|
|
|
37
39
|
mcp.tool()(buildlog_session_metrics)
|
|
38
40
|
mcp.tool()(buildlog_experiment_report)
|
|
39
41
|
|
|
42
|
+
# Gauntlet loop tools
|
|
43
|
+
mcp.tool()(buildlog_gauntlet_issues)
|
|
44
|
+
mcp.tool()(buildlog_gauntlet_accept_risk)
|
|
45
|
+
|
|
40
46
|
|
|
41
47
|
def main() -> None:
|
|
42
48
|
"""Run the MCP server."""
|
|
@@ -405,3 +405,108 @@ def buildlog_experiment_report(
|
|
|
405
405
|
buildlog_experiment_report()
|
|
406
406
|
"""
|
|
407
407
|
return get_experiment_report(Path(buildlog_dir))
|
|
408
|
+
|
|
409
|
+
|
|
410
|
+
# -----------------------------------------------------------------------------
|
|
411
|
+
# Gauntlet Loop MCP Tools
|
|
412
|
+
# -----------------------------------------------------------------------------
|
|
413
|
+
|
|
414
|
+
|
|
415
|
+
def buildlog_gauntlet_issues(
|
|
416
|
+
issues: list[dict],
|
|
417
|
+
iteration: int = 1,
|
|
418
|
+
source: str | None = None,
|
|
419
|
+
buildlog_dir: str = "buildlog",
|
|
420
|
+
) -> dict:
|
|
421
|
+
"""Process gauntlet review issues and determine next action.
|
|
422
|
+
|
|
423
|
+
Call this after running a gauntlet review. It categorizes issues by
|
|
424
|
+
severity, persists learnings, and returns the appropriate next action.
|
|
425
|
+
|
|
426
|
+
Args:
|
|
427
|
+
issues: List of issues from the gauntlet review, each with:
|
|
428
|
+
{
|
|
429
|
+
"severity": "critical|major|minor|nitpick",
|
|
430
|
+
"category": "security|testing|architectural|...",
|
|
431
|
+
"description": "What's wrong",
|
|
432
|
+
"rule_learned": "Generalizable rule",
|
|
433
|
+
"location": "file:line (optional)"
|
|
434
|
+
}
|
|
435
|
+
iteration: Current iteration number (for tracking loops)
|
|
436
|
+
source: Optional source identifier for learnings
|
|
437
|
+
buildlog_dir: Path to buildlog directory
|
|
438
|
+
|
|
439
|
+
Returns:
|
|
440
|
+
Dict with:
|
|
441
|
+
- action: What to do next:
|
|
442
|
+
- "fix_criticals": Criticals remain, auto-fix and loop
|
|
443
|
+
- "checkpoint_majors": No criticals, majors remain (ask user)
|
|
444
|
+
- "checkpoint_minors": Only minors remain (ask user)
|
|
445
|
+
- "clean": No issues remain
|
|
446
|
+
- criticals: List of critical issues
|
|
447
|
+
- majors: List of major issues
|
|
448
|
+
- minors: List of minor/nitpick issues
|
|
449
|
+
- iteration: Current iteration number
|
|
450
|
+
- learnings_persisted: Number of learnings saved
|
|
451
|
+
- message: Human-readable summary
|
|
452
|
+
|
|
453
|
+
Example:
|
|
454
|
+
# After running gauntlet review
|
|
455
|
+
result = buildlog_gauntlet_issues(
|
|
456
|
+
issues=[
|
|
457
|
+
{"severity": "critical", "category": "security", ...},
|
|
458
|
+
{"severity": "major", "category": "testing", ...},
|
|
459
|
+
],
|
|
460
|
+
iteration=1
|
|
461
|
+
)
|
|
462
|
+
# result["action"] tells you what to do next
|
|
463
|
+
"""
|
|
464
|
+
from buildlog.core import gauntlet_process_issues
|
|
465
|
+
|
|
466
|
+
result = gauntlet_process_issues(
|
|
467
|
+
Path(buildlog_dir),
|
|
468
|
+
issues=issues,
|
|
469
|
+
iteration=iteration,
|
|
470
|
+
source=source,
|
|
471
|
+
)
|
|
472
|
+
return asdict(result)
|
|
473
|
+
|
|
474
|
+
|
|
475
|
+
def buildlog_gauntlet_accept_risk(
|
|
476
|
+
remaining_issues: list[dict],
|
|
477
|
+
create_github_issues: bool = False,
|
|
478
|
+
repo: str | None = None,
|
|
479
|
+
) -> dict:
|
|
480
|
+
"""Accept risk for remaining issues, optionally creating GitHub issues.
|
|
481
|
+
|
|
482
|
+
Call this when the user decides to accept remaining issues as risk
|
|
483
|
+
(e.g., only minors remain and they want to move on).
|
|
484
|
+
|
|
485
|
+
Args:
|
|
486
|
+
remaining_issues: Issues being accepted as risk
|
|
487
|
+
create_github_issues: Whether to create GitHub issues for tracking
|
|
488
|
+
repo: Repository for GitHub issues (uses current repo if None)
|
|
489
|
+
|
|
490
|
+
Returns:
|
|
491
|
+
Dict with:
|
|
492
|
+
- accepted_issues: Number of issues accepted
|
|
493
|
+
- github_issues_created: Number of GitHub issues created
|
|
494
|
+
- github_issue_urls: URLs of created issues
|
|
495
|
+
- message: Human-readable summary
|
|
496
|
+
- error: Error message if GitHub issue creation failed
|
|
497
|
+
|
|
498
|
+
Example:
|
|
499
|
+
# User accepts risk with minors, wants GitHub issues
|
|
500
|
+
result = buildlog_gauntlet_accept_risk(
|
|
501
|
+
remaining_issues=[...],
|
|
502
|
+
create_github_issues=True
|
|
503
|
+
)
|
|
504
|
+
"""
|
|
505
|
+
from buildlog.core import gauntlet_accept_risk
|
|
506
|
+
|
|
507
|
+
result = gauntlet_accept_risk(
|
|
508
|
+
remaining_issues=remaining_issues,
|
|
509
|
+
create_github_issues=create_github_issues,
|
|
510
|
+
repo=repo,
|
|
511
|
+
)
|
|
512
|
+
return asdict(result)
|
|
@@ -6,7 +6,7 @@ from datetime import datetime
|
|
|
6
6
|
from pathlib import Path
|
|
7
7
|
from typing import TYPE_CHECKING
|
|
8
8
|
|
|
9
|
-
from buildlog.render.tracking import track_promoted
|
|
9
|
+
from buildlog.render.tracking import get_promoted_ids, track_promoted
|
|
10
10
|
from buildlog.skills import _to_imperative
|
|
11
11
|
|
|
12
12
|
if TYPE_CHECKING:
|
|
@@ -33,6 +33,8 @@ class ClaudeMdRenderer:
|
|
|
33
33
|
def render(self, skills: list[Skill]) -> str:
|
|
34
34
|
"""Append skills to CLAUDE.md.
|
|
35
35
|
|
|
36
|
+
Filters out skills that have already been promoted to prevent duplicates.
|
|
37
|
+
|
|
36
38
|
Args:
|
|
37
39
|
skills: List of skills to append.
|
|
38
40
|
|
|
@@ -42,9 +44,16 @@ class ClaudeMdRenderer:
|
|
|
42
44
|
if not skills:
|
|
43
45
|
return "No skills to promote"
|
|
44
46
|
|
|
47
|
+
# Filter out already-promoted skills
|
|
48
|
+
already_promoted = get_promoted_ids(self.tracking_path)
|
|
49
|
+
new_skills = [s for s in skills if s.id not in already_promoted]
|
|
50
|
+
|
|
51
|
+
if not new_skills:
|
|
52
|
+
return f"All {len(skills)} skills already promoted"
|
|
53
|
+
|
|
45
54
|
# Group by category
|
|
46
55
|
by_category: dict[str, list[Skill]] = {}
|
|
47
|
-
for skill in
|
|
56
|
+
for skill in new_skills:
|
|
48
57
|
by_category.setdefault(skill.category, []).append(skill)
|
|
49
58
|
|
|
50
59
|
# Build section
|
|
@@ -80,6 +89,10 @@ class ClaudeMdRenderer:
|
|
|
80
89
|
self.path.write_text(content)
|
|
81
90
|
|
|
82
91
|
# Track promoted skill IDs using shared utility
|
|
83
|
-
track_promoted(
|
|
92
|
+
track_promoted(new_skills, self.tracking_path)
|
|
84
93
|
|
|
85
|
-
|
|
94
|
+
skipped = len(skills) - len(new_skills)
|
|
95
|
+
msg = f"Appended {len(new_skills)} rules to {self.path}"
|
|
96
|
+
if skipped > 0:
|
|
97
|
+
msg += f" ({skipped} already promoted, skipped)"
|
|
98
|
+
return msg
|
|
@@ -10,7 +10,26 @@ from typing import TYPE_CHECKING
|
|
|
10
10
|
if TYPE_CHECKING:
|
|
11
11
|
from buildlog.skills import Skill
|
|
12
12
|
|
|
13
|
-
__all__ = ["track_promoted"]
|
|
13
|
+
__all__ = ["track_promoted", "get_promoted_ids"]
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def get_promoted_ids(tracking_path: Path) -> set[str]:
|
|
17
|
+
"""Get the set of already-promoted skill IDs.
|
|
18
|
+
|
|
19
|
+
Args:
|
|
20
|
+
tracking_path: Path to the tracking JSON file.
|
|
21
|
+
|
|
22
|
+
Returns:
|
|
23
|
+
Set of skill IDs that have been promoted.
|
|
24
|
+
"""
|
|
25
|
+
if not tracking_path.exists():
|
|
26
|
+
return set()
|
|
27
|
+
|
|
28
|
+
try:
|
|
29
|
+
tracking = json.loads(tracking_path.read_text())
|
|
30
|
+
return set(tracking.get("skill_ids", []))
|
|
31
|
+
except json.JSONDecodeError:
|
|
32
|
+
return set()
|
|
14
33
|
|
|
15
34
|
|
|
16
35
|
def track_promoted(skills: list[Skill], tracking_path: Path) -> None:
|
|
@@ -156,6 +156,36 @@ class SeedFile:
|
|
|
156
156
|
)
|
|
157
157
|
|
|
158
158
|
|
|
159
|
+
def _validate_seed_schema(data: dict) -> bool:
|
|
160
|
+
"""Validate seed file has expected schema structure.
|
|
161
|
+
|
|
162
|
+
Defense-in-depth validation for seed files. While yaml.safe_load
|
|
163
|
+
prevents code execution, this ensures data structure matches expectations.
|
|
164
|
+
|
|
165
|
+
Args:
|
|
166
|
+
data: Parsed YAML data.
|
|
167
|
+
|
|
168
|
+
Returns:
|
|
169
|
+
True if schema is valid, False otherwise.
|
|
170
|
+
"""
|
|
171
|
+
if not isinstance(data, dict):
|
|
172
|
+
return False
|
|
173
|
+
|
|
174
|
+
# Rules must be a list if present
|
|
175
|
+
rules = data.get("rules", [])
|
|
176
|
+
if not isinstance(rules, list):
|
|
177
|
+
return False
|
|
178
|
+
|
|
179
|
+
# Each rule must be a dict with at least a "rule" key
|
|
180
|
+
for rule in rules:
|
|
181
|
+
if not isinstance(rule, dict):
|
|
182
|
+
return False
|
|
183
|
+
if "rule" not in rule:
|
|
184
|
+
return False
|
|
185
|
+
|
|
186
|
+
return True
|
|
187
|
+
|
|
188
|
+
|
|
159
189
|
def load_seed_file(path: Path) -> SeedFile | None:
|
|
160
190
|
"""Load a single seed file from disk.
|
|
161
191
|
|
|
@@ -164,6 +194,10 @@ def load_seed_file(path: Path) -> SeedFile | None:
|
|
|
164
194
|
|
|
165
195
|
Returns:
|
|
166
196
|
Parsed SeedFile or None if loading fails.
|
|
197
|
+
|
|
198
|
+
Note:
|
|
199
|
+
Uses yaml.safe_load which is safe from code execution attacks.
|
|
200
|
+
Additional schema validation ensures data structure is as expected.
|
|
167
201
|
"""
|
|
168
202
|
if not path.exists():
|
|
169
203
|
logger.warning(f"Seed file not found: {path}")
|
|
@@ -171,7 +205,14 @@ def load_seed_file(path: Path) -> SeedFile | None:
|
|
|
171
205
|
|
|
172
206
|
try:
|
|
173
207
|
with open(path) as f:
|
|
208
|
+
# yaml.safe_load is safe - no arbitrary code execution
|
|
174
209
|
data = yaml.safe_load(f)
|
|
210
|
+
|
|
211
|
+
# Validate schema before parsing
|
|
212
|
+
if not _validate_seed_schema(data):
|
|
213
|
+
logger.error(f"Invalid seed file schema: {path}")
|
|
214
|
+
return None
|
|
215
|
+
|
|
175
216
|
return SeedFile.from_dict(data)
|
|
176
217
|
except (yaml.YAMLError, KeyError, TypeError) as e:
|
|
177
218
|
logger.error(f"Failed to parse seed file {path}: {e}")
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|