buildlog 0.7.0__py3-none-any.whl → 0.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. buildlog/__init__.py +1 -1
  2. buildlog/cli.py +659 -48
  3. buildlog/confidence.py +27 -0
  4. buildlog/core/__init__.py +2 -0
  5. buildlog/core/bandit.py +699 -0
  6. buildlog/core/operations.py +284 -24
  7. buildlog/distill.py +80 -1
  8. buildlog/engine/__init__.py +61 -0
  9. buildlog/engine/bandit.py +23 -0
  10. buildlog/engine/confidence.py +28 -0
  11. buildlog/engine/embeddings.py +28 -0
  12. buildlog/engine/experiments.py +619 -0
  13. buildlog/engine/types.py +31 -0
  14. buildlog/llm.py +508 -0
  15. buildlog/mcp/server.py +10 -6
  16. buildlog/mcp/tools.py +61 -13
  17. buildlog/render/__init__.py +19 -2
  18. buildlog/render/claude_md.py +67 -32
  19. buildlog/render/continue_dev.py +102 -0
  20. buildlog/render/copilot.py +100 -0
  21. buildlog/render/cursor.py +105 -0
  22. buildlog/render/windsurf.py +95 -0
  23. buildlog/seed_engine/__init__.py +2 -0
  24. buildlog/seed_engine/llm_extractor.py +121 -0
  25. buildlog/seed_engine/pipeline.py +45 -1
  26. buildlog/skills.py +69 -6
  27. {buildlog-0.7.0.data → buildlog-0.9.0.data}/data/share/buildlog/copier.yml +0 -4
  28. buildlog-0.9.0.data/data/share/buildlog/template/buildlog/_TEMPLATE_QUICK.md +21 -0
  29. buildlog-0.9.0.dist-info/METADATA +248 -0
  30. buildlog-0.9.0.dist-info/RECORD +55 -0
  31. buildlog-0.7.0.dist-info/METADATA +0 -544
  32. buildlog-0.7.0.dist-info/RECORD +0 -41
  33. {buildlog-0.7.0.data → buildlog-0.9.0.data}/data/share/buildlog/post_gen.py +0 -0
  34. {buildlog-0.7.0.data → buildlog-0.9.0.data}/data/share/buildlog/template/buildlog/.gitkeep +0 -0
  35. {buildlog-0.7.0.data → buildlog-0.9.0.data}/data/share/buildlog/template/buildlog/2026-01-01-example.md +0 -0
  36. {buildlog-0.7.0.data → buildlog-0.9.0.data}/data/share/buildlog/template/buildlog/BUILDLOG_SYSTEM.md +0 -0
  37. {buildlog-0.7.0.data → buildlog-0.9.0.data}/data/share/buildlog/template/buildlog/_TEMPLATE.md +0 -0
  38. {buildlog-0.7.0.data → buildlog-0.9.0.data}/data/share/buildlog/template/buildlog/assets/.gitkeep +0 -0
  39. {buildlog-0.7.0.dist-info → buildlog-0.9.0.dist-info}/WHEEL +0 -0
  40. {buildlog-0.7.0.dist-info → buildlog-0.9.0.dist-info}/entry_points.txt +0 -0
  41. {buildlog-0.7.0.dist-info → buildlog-0.9.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,121 @@
1
+ """LLM-backed rule extraction for the seed engine pipeline.
2
+
3
+ Adapts LLMBackend.extract_rules() into the RuleExtractor interface,
4
+ bridging the LLM module with the seed engine's 4-step pipeline.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import logging
10
+ from typing import TYPE_CHECKING, Any
11
+
12
+ from buildlog.seed_engine.extractors import RuleExtractor
13
+ from buildlog.seed_engine.models import CandidateRule, Source
14
+
15
+ if TYPE_CHECKING:
16
+ from buildlog.llm import LLMBackend
17
+
18
+ logger = logging.getLogger(__name__)
19
+
20
+ _PLACEHOLDER = "Not specified by LLM"
21
+
22
+
23
+ class LLMExtractor(RuleExtractor):
24
+ """LLM-backed rule extraction from source content.
25
+
26
+ Wraps any LLMBackend to produce CandidateRules with full
27
+ defensibility fields. Fields the LLM doesn't populate get
28
+ placeholder values so downstream validation passes.
29
+
30
+ Usage:
31
+ from buildlog.llm import OllamaBackend
32
+ from buildlog.seed_engine.llm_extractor import LLMExtractor
33
+
34
+ backend = OllamaBackend(model="llama3.2")
35
+ extractor = LLMExtractor(backend, source_content={"https://...": "..."})
36
+
37
+ rules = extractor.extract(source)
38
+ """
39
+
40
+ def __init__(
41
+ self,
42
+ backend: LLMBackend,
43
+ source_content: dict[str, str] | None = None,
44
+ ) -> None:
45
+ """Initialize with an LLM backend.
46
+
47
+ Args:
48
+ backend: Any LLMBackend (Ollama, Anthropic, etc.).
49
+ source_content: Optional map of source.url → text content.
50
+ For sources that need pre-fetched content.
51
+ """
52
+ self._backend = backend
53
+ self._source_content = source_content or {}
54
+
55
+ def extract(self, source: Source) -> list[CandidateRule]:
56
+ """Extract candidate rules from a source via LLM.
57
+
58
+ Resolution for content:
59
+ 1. source_content dict (keyed by source.url)
60
+ 2. source.description as fallback
61
+
62
+ Returns empty list on LLM failure (logged, not raised).
63
+ """
64
+ content = self._source_content.get(source.url, "").strip()
65
+ if not content:
66
+ content = source.description.strip()
67
+ if not content:
68
+ logger.warning("No content for source %s, skipping", source.name)
69
+ return []
70
+
71
+ try:
72
+ extracted = self._backend.extract_rules(content)
73
+ except Exception:
74
+ logger.exception("LLM extraction failed for %s", source.name)
75
+ return []
76
+
77
+ candidates: list[CandidateRule] = []
78
+ for er in extracted:
79
+ if not er.rule.strip():
80
+ continue
81
+
82
+ metadata: dict[str, Any] = {
83
+ "extractor": "llm",
84
+ "severity": er.severity,
85
+ "scope": er.scope,
86
+ }
87
+ # Include backend class name (public info only)
88
+ metadata["backend_type"] = type(self._backend).__name__
89
+
90
+ candidates.append(
91
+ CandidateRule(
92
+ rule=er.rule,
93
+ context=er.context or _PLACEHOLDER,
94
+ antipattern=er.antipattern or _PLACEHOLDER,
95
+ rationale=er.rationale or _PLACEHOLDER,
96
+ source=source,
97
+ raw_tags=[er.category] + er.applicability,
98
+ confidence=0.7,
99
+ metadata=metadata,
100
+ )
101
+ )
102
+
103
+ logger.info("LLM extracted %d rules from %s", len(candidates), source.name)
104
+ return candidates
105
+
106
+ def validate(self, rule: CandidateRule) -> list[str]:
107
+ """Validate a candidate rule.
108
+
109
+ Warns on placeholder defensibility fields.
110
+ Requires non-empty rule text.
111
+ """
112
+ issues: list[str] = []
113
+ if not rule.rule.strip():
114
+ issues.append("Rule text is empty")
115
+ if rule.context == _PLACEHOLDER:
116
+ issues.append("Context is LLM placeholder — consider enriching")
117
+ if rule.antipattern == _PLACEHOLDER:
118
+ issues.append("Antipattern is LLM placeholder — consider enriching")
119
+ if rule.rationale == _PLACEHOLDER:
120
+ issues.append("Rationale is LLM placeholder — consider enriching")
121
+ return issues
@@ -12,13 +12,16 @@ from __future__ import annotations
12
12
  import logging
13
13
  from dataclasses import dataclass
14
14
  from pathlib import Path
15
- from typing import Any
15
+ from typing import TYPE_CHECKING, Any
16
16
 
17
17
  from buildlog.seed_engine.categorizers import Categorizer, TagBasedCategorizer
18
18
  from buildlog.seed_engine.extractors import ManualExtractor, RuleExtractor
19
19
  from buildlog.seed_engine.generators import SeedGenerator
20
20
  from buildlog.seed_engine.models import CandidateRule, CategorizedRule, Source
21
21
 
22
+ if TYPE_CHECKING:
23
+ from buildlog.llm import LLMBackend
24
+
22
25
  logger = logging.getLogger(__name__)
23
26
 
24
27
 
@@ -174,6 +177,7 @@ class Pipeline:
174
177
  Returns:
175
178
  List of validation issues (empty if valid).
176
179
  """
180
+ allowed_schemes = {"https", "http", "file"}
177
181
  issues = []
178
182
  for i, source in enumerate(sources):
179
183
  prefix = f"Source {i + 1} ({source.name})"
@@ -181,10 +185,50 @@ class Pipeline:
181
185
  issues.append(f"{prefix}: Missing name")
182
186
  if not source.url.strip():
183
187
  issues.append(f"{prefix}: Missing URL")
188
+ else:
189
+ # Validate URL scheme
190
+ scheme = (
191
+ source.url.split("://")[0].lower() if "://" in source.url else ""
192
+ )
193
+ if scheme not in allowed_schemes:
194
+ issues.append(
195
+ f"{prefix}: URL scheme '{scheme}' not in allowlist {allowed_schemes}"
196
+ )
184
197
  if not source.domain.strip():
185
198
  issues.append(f"{prefix}: Missing domain")
186
199
  return issues
187
200
 
201
+ @classmethod
202
+ def with_llm(
203
+ cls,
204
+ persona: str,
205
+ backend: LLMBackend,
206
+ source_content: dict[str, str] | None = None,
207
+ default_category: str = "general",
208
+ version: int = 1,
209
+ ) -> Pipeline:
210
+ """Convenience constructor wiring LLMExtractor + TagBasedCategorizer.
211
+
212
+ Args:
213
+ persona: Persona name for the seed file.
214
+ backend: Any LLMBackend implementation.
215
+ source_content: Optional pre-fetched content map.
216
+ default_category: Fallback category for uncategorized rules.
217
+ version: Seed file version.
218
+
219
+ Returns:
220
+ Pipeline configured with LLMExtractor.
221
+ """
222
+ from buildlog.seed_engine.llm_extractor import LLMExtractor
223
+
224
+ return cls(
225
+ persona=persona,
226
+ default_category=default_category,
227
+ version=version,
228
+ extractor=LLMExtractor(backend, source_content),
229
+ categorizer=TagBasedCategorizer(default_category=default_category),
230
+ )
231
+
188
232
  def dry_run(self, sources: list[Source]) -> dict[str, Any]:
189
233
  """Run pipeline without writing, returning preview.
190
234
 
buildlog/skills.py CHANGED
@@ -23,7 +23,10 @@ import re
23
23
  from dataclasses import dataclass, field
24
24
  from datetime import date, datetime, timezone
25
25
  from pathlib import Path
26
- from typing import Final, Literal, TypedDict
26
+ from typing import TYPE_CHECKING, Final, Literal, TypedDict
27
+
28
+ if TYPE_CHECKING:
29
+ from buildlog.llm import LLMBackend
27
30
 
28
31
  from buildlog.confidence import ConfidenceConfig, ConfidenceMetrics
29
32
  from buildlog.confidence import calculate_confidence as calculate_continuous_confidence
@@ -83,6 +86,10 @@ class SkillDict(_SkillDictRequired, total=False):
83
86
  antipattern: str # What does violation look like?
84
87
  rationale: str # Why does this matter?
85
88
  persona_tags: list[str] # Which reviewers use this rule?
89
+ # LLM-extracted scoring fields
90
+ severity: str # critical/major/minor/info
91
+ scope: str # global/module/function
92
+ applicability: list[str] # contexts where relevant
86
93
 
87
94
 
88
95
  class SkillSetDict(TypedDict):
@@ -115,6 +122,9 @@ class Skill:
115
122
  antipattern: What does violation look like? (defensibility)
116
123
  rationale: Why does this rule matter? (defensibility)
117
124
  persona_tags: Which reviewer personas use this rule?
125
+ severity: How bad is ignoring this rule? (critical/major/minor/info)
126
+ scope: How broadly does this rule apply? (global/module/function)
127
+ applicability: Contexts where this rule is relevant.
118
128
  """
119
129
 
120
130
  id: str
@@ -131,6 +141,10 @@ class Skill:
131
141
  antipattern: str | None = None
132
142
  rationale: str | None = None
133
143
  persona_tags: list[str] = field(default_factory=list)
144
+ # LLM-extracted scoring
145
+ severity: str | None = None
146
+ scope: str | None = None
147
+ applicability: list[str] = field(default_factory=list)
134
148
 
135
149
  def to_dict(self) -> SkillDict:
136
150
  """Convert to dictionary for serialization.
@@ -159,6 +173,12 @@ class Skill:
159
173
  result["rationale"] = self.rationale
160
174
  if self.persona_tags:
161
175
  result["persona_tags"] = self.persona_tags
176
+ if self.severity is not None:
177
+ result["severity"] = self.severity
178
+ if self.scope is not None:
179
+ result["scope"] = self.scope
180
+ if self.applicability:
181
+ result["applicability"] = self.applicability
162
182
  return result
163
183
 
164
184
 
@@ -326,6 +346,7 @@ def _deduplicate_insights(
326
346
  patterns: list[PatternDict],
327
347
  threshold: float = MIN_SIMILARITY_THRESHOLD,
328
348
  backend: EmbeddingBackend | None = None,
349
+ llm_backend: LLMBackend | None = None,
329
350
  ) -> list[tuple[str, int, list[str], date | None, date | None]]:
330
351
  """Deduplicate similar insights into merged rules.
331
352
 
@@ -366,9 +387,17 @@ def _deduplicate_insights(
366
387
  results: list[tuple[str, int, list[str], date | None, date | None]] = []
367
388
 
368
389
  for group in groups:
369
- # Use the shortest insight as the canonical rule (often cleaner)
370
- canonical = min(group, key=lambda p: len(p["insight"]))
371
- rule = canonical["insight"]
390
+ # Use LLM to select canonical form if available and group has >1 member
391
+ if llm_backend is not None and len(group) > 1:
392
+ try:
393
+ candidates = [p["insight"] for p in group]
394
+ rule = llm_backend.select_canonical(candidates)
395
+ except Exception:
396
+ canonical = min(group, key=lambda p: len(p["insight"]))
397
+ rule = canonical["insight"]
398
+ else:
399
+ canonical = min(group, key=lambda p: len(p["insight"]))
400
+ rule = canonical["insight"]
372
401
  frequency = len(group)
373
402
  sources = sorted(set(p["source"] for p in group))
374
403
 
@@ -434,6 +463,7 @@ def generate_skills(
434
463
  embedding_backend: str | None = None,
435
464
  confidence_config: ConfidenceConfig | None = None,
436
465
  include_review_learnings: bool = True,
466
+ llm: bool = False,
437
467
  ) -> SkillSet:
438
468
  """Generate skills from buildlog patterns and review learnings.
439
469
 
@@ -449,12 +479,21 @@ def generate_skills(
449
479
  include_review_learnings: Whether to include learnings from code reviews.
450
480
  When True, loads .buildlog/review_learnings.json and merges
451
481
  review learnings into the skill set.
482
+ llm: If True and an LLM backend is available, use LLM for extraction,
483
+ canonical selection, and scoring. Falls back gracefully.
452
484
 
453
485
  Returns:
454
486
  SkillSet with generated skills.
455
487
  """
488
+ # Resolve LLM backend if requested
489
+ llm_backend = None
490
+ if llm:
491
+ from buildlog.llm import get_llm_backend
492
+
493
+ llm_backend = get_llm_backend(buildlog_dir=buildlog_dir)
494
+
456
495
  # Get distilled patterns
457
- result = distill_all(buildlog_dir, since=since_date)
496
+ result = distill_all(buildlog_dir, since=since_date, llm=llm)
458
497
 
459
498
  # Get embedding backend
460
499
  backend = (
@@ -471,7 +510,9 @@ def generate_skills(
471
510
 
472
511
  for category in CATEGORIES:
473
512
  patterns = result.patterns.get(category, [])
474
- deduplicated = _deduplicate_insights(patterns, backend=backend)
513
+ deduplicated = _deduplicate_insights(
514
+ patterns, backend=backend, llm_backend=llm_backend
515
+ )
475
516
 
476
517
  skills: list[Skill] = []
477
518
  for rule, frequency, sources, most_recent, earliest in deduplicated:
@@ -490,6 +531,25 @@ def generate_skills(
490
531
  confidence_score, confidence_config
491
532
  ).value
492
533
 
534
+ # LLM scoring for severity/scope/applicability
535
+ severity: str | None = None
536
+ scope: str | None = None
537
+ applicability_tags: list[str] = []
538
+ if llm_backend is not None:
539
+ try:
540
+ scoring = llm_backend.score_rule(rule, category)
541
+ severity = scoring.severity
542
+ scope = scoring.scope
543
+ applicability_tags = scoring.applicability
544
+ except Exception:
545
+ pass # Keep defaults (None/empty)
546
+
547
+ # Apply severity weighting to confidence score
548
+ if confidence_score is not None and severity is not None:
549
+ from buildlog.confidence import apply_severity_weight
550
+
551
+ confidence_score = apply_severity_weight(confidence_score, severity)
552
+
493
553
  skill = Skill(
494
554
  id=_generate_skill_id(category, rule),
495
555
  category=category,
@@ -500,6 +560,9 @@ def generate_skills(
500
560
  tags=_extract_tags(rule),
501
561
  confidence_score=confidence_score,
502
562
  confidence_tier=confidence_tier,
563
+ severity=severity,
564
+ scope=scope,
565
+ applicability=applicability_tags,
503
566
  )
504
567
  skills.append(skill)
505
568
 
@@ -20,10 +20,6 @@ update_claude_md:
20
20
  help: Add buildlog instructions to CLAUDE.md if it exists?
21
21
  default: true
22
22
 
23
- # Post-generation tasks
24
- _tasks:
25
- - "{{ 'python3 post_gen.py' if update_claude_md else 'echo Skipping CLAUDE.md update' }}"
26
-
27
23
  _message_after_copy: |
28
24
  Build journal installed!
29
25
 
@@ -0,0 +1,21 @@
1
+ # Build Journal: [TITLE]
2
+
3
+ **Date:** [YYYY-MM-DD]
4
+ **Duration:** [X hours]
5
+
6
+ ## What I Did
7
+
8
+ [What you built, fixed, or changed. 2-3 sentences.]
9
+
10
+ ## What Went Wrong
11
+
12
+ [Mistakes, surprises, dead ends. Be specific — these become rules.]
13
+
14
+ ## What I Learned
15
+
16
+ ### Improvements
17
+
18
+ - [One thing to do differently next time]
19
+ - [One thing that worked well to repeat]
20
+
21
+ *More sections: see _TEMPLATE.md for the full format.*
@@ -0,0 +1,248 @@
1
+ Metadata-Version: 2.4
2
+ Name: buildlog
3
+ Version: 0.9.0
4
+ Summary: Engineering notebook for AI-assisted development
5
+ Project-URL: Homepage, https://github.com/Peleke/buildlog-template
6
+ Project-URL: Repository, https://github.com/Peleke/buildlog-template
7
+ Author: Peleke Sengstacke
8
+ License-Expression: MIT
9
+ License-File: LICENSE
10
+ Keywords: ai,buildlog,development,documentation,journal
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: Environment :: Console
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: License :: OSI Approved :: MIT License
15
+ Classifier: Programming Language :: Python :: 3
16
+ Classifier: Programming Language :: Python :: 3.10
17
+ Classifier: Programming Language :: Python :: 3.11
18
+ Classifier: Programming Language :: Python :: 3.12
19
+ Classifier: Programming Language :: Python :: 3.13
20
+ Classifier: Topic :: Documentation
21
+ Classifier: Topic :: Software Development :: Documentation
22
+ Requires-Python: >=3.10
23
+ Requires-Dist: click>=8.0.0
24
+ Requires-Dist: copier>=9.0.0
25
+ Requires-Dist: numpy>=1.21.0
26
+ Requires-Dist: pymupdf>=1.26.7
27
+ Requires-Dist: pyyaml>=6.0.0
28
+ Provides-Extra: all
29
+ Requires-Dist: anthropic>=0.40.0; extra == 'all'
30
+ Requires-Dist: mcp>=1.0.0; extra == 'all'
31
+ Requires-Dist: ollama>=0.4.0; extra == 'all'
32
+ Requires-Dist: openai>=1.0.0; extra == 'all'
33
+ Requires-Dist: sentence-transformers>=2.2.0; extra == 'all'
34
+ Provides-Extra: anthropic
35
+ Requires-Dist: anthropic>=0.40.0; extra == 'anthropic'
36
+ Provides-Extra: dev
37
+ Requires-Dist: black>=24.0.0; extra == 'dev'
38
+ Requires-Dist: flake8>=7.0.0; extra == 'dev'
39
+ Requires-Dist: isort>=5.13.0; extra == 'dev'
40
+ Requires-Dist: mkdocs-material>=9.5.0; extra == 'dev'
41
+ Requires-Dist: mypy>=1.8.0; extra == 'dev'
42
+ Requires-Dist: pre-commit>=3.6.0; extra == 'dev'
43
+ Requires-Dist: pytest-asyncio>=0.21.0; extra == 'dev'
44
+ Requires-Dist: pytest-cov>=4.0.0; extra == 'dev'
45
+ Requires-Dist: pytest>=7.0.0; extra == 'dev'
46
+ Requires-Dist: types-pyyaml>=6.0.0; extra == 'dev'
47
+ Provides-Extra: embeddings
48
+ Requires-Dist: sentence-transformers>=2.2.0; extra == 'embeddings'
49
+ Provides-Extra: engine
50
+ Provides-Extra: llm
51
+ Requires-Dist: anthropic>=0.40.0; extra == 'llm'
52
+ Requires-Dist: ollama>=0.4.0; extra == 'llm'
53
+ Provides-Extra: mcp
54
+ Requires-Dist: mcp>=1.0.0; extra == 'mcp'
55
+ Provides-Extra: ollama
56
+ Requires-Dist: ollama>=0.4.0; extra == 'ollama'
57
+ Provides-Extra: openai
58
+ Requires-Dist: openai>=1.0.0; extra == 'openai'
59
+ Description-Content-Type: text/markdown
60
+
61
+ <div align="center">
62
+
63
+ # buildlog
64
+
65
+ ### A measurable learning loop for AI-assisted work
66
+
67
+ [![PyPI](https://img.shields.io/pypi/v/buildlog?style=for-the-badge&logo=pypi&logoColor=white)](https://pypi.org/project/buildlog/)
68
+ [![Python](https://img.shields.io/pypi/pyversions/buildlog?style=for-the-badge&logo=python&logoColor=white)](https://python.org/)
69
+ [![CI](https://img.shields.io/github/actions/workflow/status/Peleke/buildlog-template/ci.yml?branch=main&style=for-the-badge&logo=github&label=CI)](https://github.com/Peleke/buildlog-template/actions/workflows/ci.yml)
70
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg?style=for-the-badge)](https://opensource.org/licenses/MIT)
71
+ [![Docs](https://img.shields.io/badge/docs-GitHub%20Pages-blue?style=for-the-badge&logo=github)](https://peleke.github.io/buildlog-template/)
72
+
73
+ **Track what works. Prove it. Drop what doesn't.**
74
+
75
+ <img src="assets/hero-banner-perfectdeliberate.png" alt="buildlog - A measurable learning loop for AI-assisted work" width="800"/>
76
+
77
+ > **RE: The art.** Yes, it's AI-generated. Yes, that's hypocritical for a project about rigor over vibes. Looking for an actual artist to pay for a real logo. If you know someone good, [open an issue](https://github.com/Peleke/buildlog-template/issues) or DM me. Budget exists.
78
+
79
+ **[Read the full documentation](https://peleke.github.io/buildlog-template/)**
80
+
81
+ </div>
82
+
83
+ ---
84
+
85
+ ## The Problem
86
+
87
+ Most AI agents do not learn. They execute without retaining context. You can bolt on memory stores and tool routers, but if the system cannot demonstrably improve its decision-making over time, you have a persistent memory store, not a learning system.
88
+
89
+ Every AI-assisted work session produces a trajectory: goals, decisions, tool uses, corrections, outcomes. Almost all of this is discarded. The next session starts from scratch with the same blind spots.
90
+
91
+ buildlog exists to close that gap. It captures structured trajectories from real work, extracts decision patterns, and uses statistical methods to select which patterns to surface in future sessions, then measures whether that selection actually reduced mistakes.
92
+
93
+ buildlog measures whether the system actually got better, and proves it.
94
+
95
+ ## How It Works
96
+
97
+ ### 1. Capture structured work trajectories
98
+
99
+ Each session is a dated entry documenting what you did, what went wrong, and what you learned. Each session is a structured record of decisions and outcomes, not a chat transcript.
100
+
101
+ ```bash
102
+ buildlog init # scaffold a project
103
+ buildlog new my-feature # start a session
104
+ # ... work ...
105
+ buildlog commit -m "feat: add auth"
106
+ ```
107
+
108
+ ### 2. Extract decision patterns as seeds
109
+
110
+ The seed engine watches your development patterns and extracts **seeds**: atomic observations about what works. A seed might be "always define interfaces before implementations" or "mock at the boundary, not the implementation." Each seed carries a category, a confidence score, and source provenance.
111
+
112
+ Extraction runs through a pipeline: `sources -> extractors -> categorizers -> generators`. Extractors range from regex-based (fast, cheap, brittle) to LLM-backed (accurate, expensive). The pipeline deduplicates semantically using embeddings.
113
+
114
+ ### 3. Select which patterns to surface using Thompson Sampling
115
+
116
+ Seeds compete for inclusion in your agent's instruction set. The system treats each seed as an arm in a contextual bandit and uses **Thompson Sampling** to balance exploration (trying under-tested rules) against exploitation (surfacing rules with strong track records).
117
+
118
+ Each seed maintains a Beta posterior updated by observed outcomes. Over time, the system converges on the rules that actually reduce mistakes in your specific codebase and workflow, not rules that sound good in the abstract.
119
+
120
+ ### 4. Render to every agent format
121
+
122
+ Selected rules are written into the instruction files your agents actually read:
123
+
124
+ - `CLAUDE.md` (Claude Code)
125
+ - `.cursorrules` (Cursor)
126
+ - `.github/copilot-instructions.md` (GitHub Copilot)
127
+ - Windsurf, Continue.dev, generic `settings.json`
128
+
129
+ The same knowledge base renders to every agent format.
130
+
131
+ ```bash
132
+ buildlog skills # render current policy to agent files
133
+ ```
134
+
135
+ ### 5. Close the loop with experiments
136
+
137
+ Track whether the selected rules are working. Run experiments, measure Repeated Mistake Rate (RMR) across sessions, and get statistical evidence, not feelings, about what improved.
138
+
139
+ ```bash
140
+ buildlog experiment start
141
+ # ... work across sessions ...
142
+ buildlog experiment end
143
+ buildlog experiment report
144
+ ```
145
+
146
+ ## What Else Is In the Box
147
+
148
+ - **Review gauntlet:** automated quality gate with curated reviewer personas. Runs on commits (via Claude Code hooks or CI) and files GitHub issues for findings, categorized by severity.
149
+ - **LLM-backed extraction:** when regex isn't enough, the seed engine can use OpenAI, Anthropic, or Ollama to extract patterns from code and logs. Metered backend tracks token usage and cost.
150
+ - **MCP server:** buildlog exposes itself as an MCP server so agents can query seeds, skills, and build history programmatically during sessions.
151
+ - **npm wrapper:** `npx @peleke.s/buildlog` for JS/TS projects. Thin shim that finds and invokes the Python CLI.
152
+
153
+ ## Current Limits
154
+
155
+ This is v0.8, not the end state.
156
+
157
+ - **Extraction quality is uneven.** Regex extractors miss nuance; LLM extractors are accurate but expensive. The middle ground is still being found.
158
+ - **Feedback signals are coarse.** Repeated Mistake Rate works but requires manual tagging. Richer automatic signals (test outcomes, review results, revision distance) are on the roadmap.
159
+ - **Credit assignment is limited.** When multiple rules are active, the system doesn't yet isolate which one was responsible for an outcome.
160
+ - **Single-agent only.** Multi-agent coordination (shared learning across agents) is designed but not implemented.
161
+ - **Long-horizon learning is not modeled.** The bandit operates per-session. Longer arcs of competence building need richer policy models.
162
+
163
+ The roadmap: contextual bandits (now) -> richer policy models -> longer-horizon RL -> multi-agent coordination. Each step builds on the same foundation: measuring whether rule changes actually reduce mistakes.
164
+
165
+ ## Installation
166
+
167
+ ### Global install (recommended)
168
+
169
+ ```bash
170
+ uv tool install "buildlog[mcp]" # or: pipx install "buildlog[mcp]"
171
+ ```
172
+
173
+ This puts `buildlog` and `buildlog-mcp` on your PATH. Works from any directory. The `[mcp]` extra is required for the MCP server.
174
+
175
+ ### Per-project (virtual environment)
176
+
177
+ ```bash
178
+ uv pip install "buildlog[mcp]" # or: pip install "buildlog[mcp]"
179
+ ```
180
+
181
+ Omit `[mcp]` if you only need the CLI.
182
+
183
+ ### For JS/TS projects
184
+
185
+ ```bash
186
+ npx @peleke.s/buildlog init
187
+ ```
188
+
189
+ ### MCP server for Claude Code
190
+
191
+ Add to `~/.claude/claude_code_config.json`:
192
+
193
+ ```json
194
+ {
195
+ "mcpServers": {
196
+ "buildlog": {
197
+ "command": "buildlog-mcp",
198
+ "args": []
199
+ }
200
+ }
201
+ }
202
+ ```
203
+
204
+ This exposes buildlog tools (seeds, skills, experiments, gauntlet, bandit status) to any Claude Code session.
205
+
206
+ ## Quick Start
207
+
208
+ ```bash
209
+ buildlog init # scaffold a project (run in any repo)
210
+ buildlog new my-feature # start a session
211
+ # ... work ...
212
+ buildlog distill && buildlog skills
213
+ buildlog experiment start
214
+ # ... work across sessions ...
215
+ buildlog experiment end
216
+ buildlog experiment report
217
+ ```
218
+
219
+ ## Documentation
220
+
221
+ | Section | Description |
222
+ |---------|------------|
223
+ | [Installation](https://peleke.github.io/buildlog-template/getting-started/installation/) | Setup, extras, and initialization |
224
+ | [Quick Start](https://peleke.github.io/buildlog-template/getting-started/quick-start/) | Full pipeline walkthrough |
225
+ | [Core Concepts](https://peleke.github.io/buildlog-template/getting-started/concepts/) | The problem, the claim, and the metric |
226
+ | [CLI Reference](https://peleke.github.io/buildlog-template/guides/cli-reference/) | Every command documented |
227
+ | [MCP Integration](https://peleke.github.io/buildlog-template/guides/mcp-integration/) | Claude Code setup and available tools |
228
+ | [Experiments](https://peleke.github.io/buildlog-template/guides/experiments/) | Running and measuring experiments |
229
+ | [Review Gauntlet](https://peleke.github.io/buildlog-template/guides/review-gauntlet/) | Reviewer personas and the gauntlet loop |
230
+ | [Multi-Agent Setup](https://peleke.github.io/buildlog-template/guides/multi-agent/) | Render rules to any AI coding agent |
231
+ | [Theory](https://peleke.github.io/buildlog-template/theory/00-background/) | The math behind Thompson Sampling |
232
+ | [Philosophy](https://peleke.github.io/buildlog-template/philosophy/) | Principles and honest limitations |
233
+
234
+ ## Contributing
235
+
236
+ ```bash
237
+ git clone https://github.com/Peleke/buildlog-template
238
+ cd buildlog-template
239
+ uv venv && source .venv/bin/activate
240
+ uv pip install -e ".[dev]"
241
+ pytest
242
+ ```
243
+
244
+ We're especially interested in better context representations, credit assignment approaches, statistical methodology improvements, and real-world experiment results (positive or negative).
245
+
246
+ ## License
247
+
248
+ MIT License. See [LICENSE](./LICENSE)