PyPI - buildlog - Versions diffs - 0.7.0__py3-none-any.whl → 0.9.0__py3-none-any.whl - Mend

buildlog 0.7.0py3-none-any.whl → 0.9.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (41) hide show

buildlog/__init__.py +1 -1
buildlog/cli.py +659 -48
buildlog/confidence.py +27 -0
buildlog/core/__init__.py +2 -0
buildlog/core/bandit.py +699 -0
buildlog/core/operations.py +284 -24
buildlog/distill.py +80 -1
buildlog/engine/__init__.py +61 -0
buildlog/engine/bandit.py +23 -0
buildlog/engine/confidence.py +28 -0
buildlog/engine/embeddings.py +28 -0
buildlog/engine/experiments.py +619 -0
buildlog/engine/types.py +31 -0
buildlog/llm.py +508 -0
buildlog/mcp/server.py +10 -6
buildlog/mcp/tools.py +61 -13
buildlog/render/__init__.py +19 -2
buildlog/render/claude_md.py +67 -32
buildlog/render/continue_dev.py +102 -0
buildlog/render/copilot.py +100 -0
buildlog/render/cursor.py +105 -0
buildlog/render/windsurf.py +95 -0
buildlog/seed_engine/__init__.py +2 -0
buildlog/seed_engine/llm_extractor.py +121 -0
buildlog/seed_engine/pipeline.py +45 -1
buildlog/skills.py +69 -6
{buildlog-0.7.0.data → buildlog-0.9.0.data}/data/share/buildlog/copier.yml +0 -4
buildlog-0.9.0.data/data/share/buildlog/template/buildlog/_TEMPLATE_QUICK.md +21 -0
buildlog-0.9.0.dist-info/METADATA +248 -0
buildlog-0.9.0.dist-info/RECORD +55 -0
buildlog-0.7.0.dist-info/METADATA +0 -544
buildlog-0.7.0.dist-info/RECORD +0 -41
{buildlog-0.7.0.data → buildlog-0.9.0.data}/data/share/buildlog/post_gen.py +0 -0
{buildlog-0.7.0.data → buildlog-0.9.0.data}/data/share/buildlog/template/buildlog/.gitkeep +0 -0
{buildlog-0.7.0.data → buildlog-0.9.0.data}/data/share/buildlog/template/buildlog/2026-01-01-example.md +0 -0
{buildlog-0.7.0.data → buildlog-0.9.0.data}/data/share/buildlog/template/buildlog/BUILDLOG_SYSTEM.md +0 -0
{buildlog-0.7.0.data → buildlog-0.9.0.data}/data/share/buildlog/template/buildlog/_TEMPLATE.md +0 -0
{buildlog-0.7.0.data → buildlog-0.9.0.data}/data/share/buildlog/template/buildlog/assets/.gitkeep +0 -0
{buildlog-0.7.0.dist-info → buildlog-0.9.0.dist-info}/WHEEL +0 -0
{buildlog-0.7.0.dist-info → buildlog-0.9.0.dist-info}/entry_points.txt +0 -0
{buildlog-0.7.0.dist-info → buildlog-0.9.0.dist-info}/licenses/LICENSE +0 -0

buildlog/seed_engine/llm_extractor.py ADDED Viewed

@@ -0,0 +1,121 @@
+"""LLM-backed rule extraction for the seed engine pipeline.
+Adapts LLMBackend.extract_rules() into the RuleExtractor interface,
+bridging the LLM module with the seed engine's 4-step pipeline.
+"""
+from __future__ import annotations
+import logging
+from typing import TYPE_CHECKING, Any
+from buildlog.seed_engine.extractors import RuleExtractor
+from buildlog.seed_engine.models import CandidateRule, Source
+if TYPE_CHECKING:
+    from buildlog.llm import LLMBackend
+logger = logging.getLogger(__name__)
+_PLACEHOLDER = "Not specified by LLM"
+class LLMExtractor(RuleExtractor):
+    """LLM-backed rule extraction from source content.
+    Wraps any LLMBackend to produce CandidateRules with full
+    defensibility fields. Fields the LLM doesn't populate get
+    placeholder values so downstream validation passes.
+    Usage:
+        from buildlog.llm import OllamaBackend
+        from buildlog.seed_engine.llm_extractor import LLMExtractor
+        backend = OllamaBackend(model="llama3.2")
+        extractor = LLMExtractor(backend, source_content={"https://...": "..."})
+        rules = extractor.extract(source)
+    """
+    def __init__(
+        self,
+        backend: LLMBackend,
+        source_content: dict[str, str] | None = None,
+    ) -> None:
+        """Initialize with an LLM backend.
+        Args:
+            backend: Any LLMBackend (Ollama, Anthropic, etc.).
+            source_content: Optional map of source.url → text content.
+                For sources that need pre-fetched content.
+        """
+        self._backend = backend
+        self._source_content = source_content or {}
+    def extract(self, source: Source) -> list[CandidateRule]:
+        """Extract candidate rules from a source via LLM.
+        Resolution for content:
+        1. source_content dict (keyed by source.url)
+        2. source.description as fallback
+        Returns empty list on LLM failure (logged, not raised).
+        """
+        content = self._source_content.get(source.url, "").strip()
+        if not content:
+            content = source.description.strip()
+        if not content:
+            logger.warning("No content for source %s, skipping", source.name)
+            return []
+        try:
+            extracted = self._backend.extract_rules(content)
+        except Exception:
+            logger.exception("LLM extraction failed for %s", source.name)
+            return []
+        candidates: list[CandidateRule] = []
+        for er in extracted:
+            if not er.rule.strip():
+                continue
+            metadata: dict[str, Any] = {
+                "extractor": "llm",
+                "severity": er.severity,
+                "scope": er.scope,
+            }
+            # Include backend class name (public info only)
+            metadata["backend_type"] = type(self._backend).__name__
+            candidates.append(
+                CandidateRule(
+                    rule=er.rule,
+                    context=er.context or _PLACEHOLDER,
+                    antipattern=er.antipattern or _PLACEHOLDER,
+                    rationale=er.rationale or _PLACEHOLDER,
+                    source=source,
+                    raw_tags=[er.category] + er.applicability,
+                    confidence=0.7,
+                    metadata=metadata,
+                )
+            )
+        logger.info("LLM extracted %d rules from %s", len(candidates), source.name)
+        return candidates
+    def validate(self, rule: CandidateRule) -> list[str]:
+        """Validate a candidate rule.
+        Warns on placeholder defensibility fields.
+        Requires non-empty rule text.
+        """
+        issues: list[str] = []
+        if not rule.rule.strip():
+            issues.append("Rule text is empty")
+        if rule.context == _PLACEHOLDER:
+            issues.append("Context is LLM placeholder — consider enriching")
+        if rule.antipattern == _PLACEHOLDER:
+            issues.append("Antipattern is LLM placeholder — consider enriching")
+        if rule.rationale == _PLACEHOLDER:
+            issues.append("Rationale is LLM placeholder — consider enriching")
+        return issues

buildlog/seed_engine/pipeline.py CHANGED Viewed

@@ -12,13 +12,16 @@ from __future__ import annotations
 import logging
 from dataclasses import dataclass
 from pathlib import Path
-from typing import Any
+from typing import TYPE_CHECKING, Any
 from buildlog.seed_engine.categorizers import Categorizer, TagBasedCategorizer
 from buildlog.seed_engine.extractors import ManualExtractor, RuleExtractor
 from buildlog.seed_engine.generators import SeedGenerator
 from buildlog.seed_engine.models import CandidateRule, CategorizedRule, Source
+if TYPE_CHECKING:
+    from buildlog.llm import LLMBackend
 logger = logging.getLogger(__name__)
@@ -174,6 +177,7 @@ class Pipeline:
         Returns:
             List of validation issues (empty if valid).
         """
+        allowed_schemes = {"https", "http", "file"}
         issues = []
         for i, source in enumerate(sources):
             prefix = f"Source {i + 1} ({source.name})"
@@ -181,10 +185,50 @@ class Pipeline:
                 issues.append(f"{prefix}: Missing name")
             if not source.url.strip():
                 issues.append(f"{prefix}: Missing URL")
+            else:
+                # Validate URL scheme
+                scheme = (
+                    source.url.split("://")[0].lower() if "://" in source.url else ""
+                )
+                if scheme not in allowed_schemes:
+                    issues.append(
+                        f"{prefix}: URL scheme '{scheme}' not in allowlist {allowed_schemes}"
+                    )
             if not source.domain.strip():
                 issues.append(f"{prefix}: Missing domain")
         return issues
+    @classmethod
+    def with_llm(
+        cls,
+        persona: str,
+        backend: LLMBackend,
+        source_content: dict[str, str] | None = None,
+        default_category: str = "general",
+        version: int = 1,
+    ) -> Pipeline:
+        """Convenience constructor wiring LLMExtractor + TagBasedCategorizer.
+        Args:
+            persona: Persona name for the seed file.
+            backend: Any LLMBackend implementation.
+            source_content: Optional pre-fetched content map.
+            default_category: Fallback category for uncategorized rules.
+            version: Seed file version.
+        Returns:
+            Pipeline configured with LLMExtractor.
+        """
+        from buildlog.seed_engine.llm_extractor import LLMExtractor
+        return cls(
+            persona=persona,
+            default_category=default_category,
+            version=version,
+            extractor=LLMExtractor(backend, source_content),
+            categorizer=TagBasedCategorizer(default_category=default_category),
+        )
     def dry_run(self, sources: list[Source]) -> dict[str, Any]:
         """Run pipeline without writing, returning preview.

buildlog/skills.py CHANGED Viewed

@@ -23,7 +23,10 @@ import re
 from dataclasses import dataclass, field
 from datetime import date, datetime, timezone
 from pathlib import Path
-from typing import Final, Literal, TypedDict
+from typing import TYPE_CHECKING, Final, Literal, TypedDict
+if TYPE_CHECKING:
+    from buildlog.llm import LLMBackend
 from buildlog.confidence import ConfidenceConfig, ConfidenceMetrics
 from buildlog.confidence import calculate_confidence as calculate_continuous_confidence
@@ -83,6 +86,10 @@ class SkillDict(_SkillDictRequired, total=False):
     antipattern: str  # What does violation look like?
     rationale: str  # Why does this matter?
     persona_tags: list[str]  # Which reviewers use this rule?
+    # LLM-extracted scoring fields
+    severity: str  # critical/major/minor/info
+    scope: str  # global/module/function
+    applicability: list[str]  # contexts where relevant
 class SkillSetDict(TypedDict):
@@ -115,6 +122,9 @@ class Skill:
         antipattern: What does violation look like? (defensibility)
         rationale: Why does this rule matter? (defensibility)
         persona_tags: Which reviewer personas use this rule?
+        severity: How bad is ignoring this rule? (critical/major/minor/info)
+        scope: How broadly does this rule apply? (global/module/function)
+        applicability: Contexts where this rule is relevant.
     """
     id: str
@@ -131,6 +141,10 @@ class Skill:
     antipattern: str | None = None
     rationale: str | None = None
     persona_tags: list[str] = field(default_factory=list)
+    # LLM-extracted scoring
+    severity: str | None = None
+    scope: str | None = None
+    applicability: list[str] = field(default_factory=list)
     def to_dict(self) -> SkillDict:
         """Convert to dictionary for serialization.
@@ -159,6 +173,12 @@ class Skill:
             result["rationale"] = self.rationale
         if self.persona_tags:
             result["persona_tags"] = self.persona_tags
+        if self.severity is not None:
+            result["severity"] = self.severity
+        if self.scope is not None:
+            result["scope"] = self.scope
+        if self.applicability:
+            result["applicability"] = self.applicability
         return result
@@ -326,6 +346,7 @@ def _deduplicate_insights(
     patterns: list[PatternDict],
     threshold: float = MIN_SIMILARITY_THRESHOLD,
     backend: EmbeddingBackend | None = None,
+    llm_backend: LLMBackend | None = None,
 ) -> list[tuple[str, int, list[str], date | None, date | None]]:
     """Deduplicate similar insights into merged rules.
@@ -366,9 +387,17 @@ def _deduplicate_insights(
     results: list[tuple[str, int, list[str], date | None, date | None]] = []
     for group in groups:
-        # Use the shortest insight as the canonical rule (often cleaner)
-        canonical = min(group, key=lambda p: len(p["insight"]))
-        rule = canonical["insight"]
+        # Use LLM to select canonical form if available and group has >1 member
+        if llm_backend is not None and len(group) > 1:
+            try:
+                candidates = [p["insight"] for p in group]
+                rule = llm_backend.select_canonical(candidates)
+            except Exception:
+                canonical = min(group, key=lambda p: len(p["insight"]))
+                rule = canonical["insight"]
+        else:
+            canonical = min(group, key=lambda p: len(p["insight"]))
+            rule = canonical["insight"]
         frequency = len(group)
         sources = sorted(set(p["source"] for p in group))
@@ -434,6 +463,7 @@ def generate_skills(
     embedding_backend: str | None = None,
     confidence_config: ConfidenceConfig | None = None,
     include_review_learnings: bool = True,
+    llm: bool = False,
 ) -> SkillSet:
     """Generate skills from buildlog patterns and review learnings.
@@ -449,12 +479,21 @@ def generate_skills(
         include_review_learnings: Whether to include learnings from code reviews.
             When True, loads .buildlog/review_learnings.json and merges
             review learnings into the skill set.
+        llm: If True and an LLM backend is available, use LLM for extraction,
+            canonical selection, and scoring. Falls back gracefully.
     Returns:
         SkillSet with generated skills.
     """
+    # Resolve LLM backend if requested
+    llm_backend = None
+    if llm:
+        from buildlog.llm import get_llm_backend
+        llm_backend = get_llm_backend(buildlog_dir=buildlog_dir)
     # Get distilled patterns
-    result = distill_all(buildlog_dir, since=since_date)
+    result = distill_all(buildlog_dir, since=since_date, llm=llm)
     # Get embedding backend
     backend = (
@@ -471,7 +510,9 @@ def generate_skills(
     for category in CATEGORIES:
         patterns = result.patterns.get(category, [])
-        deduplicated = _deduplicate_insights(patterns, backend=backend)
+        deduplicated = _deduplicate_insights(
+            patterns, backend=backend, llm_backend=llm_backend
+        )
         skills: list[Skill] = []
         for rule, frequency, sources, most_recent, earliest in deduplicated:
@@ -490,6 +531,25 @@ def generate_skills(
                     confidence_score, confidence_config
                 ).value
+            # LLM scoring for severity/scope/applicability
+            severity: str | None = None
+            scope: str | None = None
+            applicability_tags: list[str] = []
+            if llm_backend is not None:
+                try:
+                    scoring = llm_backend.score_rule(rule, category)
+                    severity = scoring.severity
+                    scope = scoring.scope
+                    applicability_tags = scoring.applicability
+                except Exception:
+                    pass  # Keep defaults (None/empty)
+            # Apply severity weighting to confidence score
+            if confidence_score is not None and severity is not None:
+                from buildlog.confidence import apply_severity_weight
+                confidence_score = apply_severity_weight(confidence_score, severity)
             skill = Skill(
                 id=_generate_skill_id(category, rule),
                 category=category,
@@ -500,6 +560,9 @@ def generate_skills(
                 tags=_extract_tags(rule),
                 confidence_score=confidence_score,
                 confidence_tier=confidence_tier,
+                severity=severity,
+                scope=scope,
+                applicability=applicability_tags,
             )
             skills.append(skill)

{buildlog-0.7.0.data → buildlog-0.9.0.data}/data/share/buildlog/copier.yml RENAMED Viewed

@@ -20,10 +20,6 @@ update_claude_md:
   help: Add buildlog instructions to CLAUDE.md if it exists?
   default: true
-# Post-generation tasks
-_tasks:
-  - "{{ 'python3 post_gen.py' if update_claude_md else 'echo Skipping CLAUDE.md update' }}"
 _message_after_copy: |
   Build journal installed!

buildlog-0.9.0.data/data/share/buildlog/template/buildlog/_TEMPLATE_QUICK.md ADDED Viewed

@@ -0,0 +1,21 @@
+# Build Journal: [TITLE]
+**Date:** [YYYY-MM-DD]
+**Duration:** [X hours]
+## What I Did
+[What you built, fixed, or changed. 2-3 sentences.]
+## What Went Wrong
+[Mistakes, surprises, dead ends. Be specific — these become rules.]
+## What I Learned
+### Improvements
+- [One thing to do differently next time]
+- [One thing that worked well to repeat]
+*More sections: see _TEMPLATE.md for the full format.*

buildlog-0.9.0.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,248 @@
+Metadata-Version: 2.4
+Name: buildlog
+Version: 0.9.0
+Summary: Engineering notebook for AI-assisted development
+Project-URL: Homepage, https://github.com/Peleke/buildlog-template
+Project-URL: Repository, https://github.com/Peleke/buildlog-template
+Author: Peleke Sengstacke
+License-Expression: MIT
+License-File: LICENSE
+Keywords: ai,buildlog,development,documentation,journal
+Classifier: Development Status :: 4 - Beta
+Classifier: Environment :: Console
+Classifier: Intended Audience :: Developers
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Programming Language :: Python :: 3.13
+Classifier: Topic :: Documentation
+Classifier: Topic :: Software Development :: Documentation
+Requires-Python: >=3.10
+Requires-Dist: click>=8.0.0
+Requires-Dist: copier>=9.0.0
+Requires-Dist: numpy>=1.21.0
+Requires-Dist: pymupdf>=1.26.7
+Requires-Dist: pyyaml>=6.0.0
+Provides-Extra: all
+Requires-Dist: anthropic>=0.40.0; extra == 'all'
+Requires-Dist: mcp>=1.0.0; extra == 'all'
+Requires-Dist: ollama>=0.4.0; extra == 'all'
+Requires-Dist: openai>=1.0.0; extra == 'all'
+Requires-Dist: sentence-transformers>=2.2.0; extra == 'all'
+Provides-Extra: anthropic
+Requires-Dist: anthropic>=0.40.0; extra == 'anthropic'
+Provides-Extra: dev
+Requires-Dist: black>=24.0.0; extra == 'dev'
+Requires-Dist: flake8>=7.0.0; extra == 'dev'
+Requires-Dist: isort>=5.13.0; extra == 'dev'
+Requires-Dist: mkdocs-material>=9.5.0; extra == 'dev'
+Requires-Dist: mypy>=1.8.0; extra == 'dev'
+Requires-Dist: pre-commit>=3.6.0; extra == 'dev'
+Requires-Dist: pytest-asyncio>=0.21.0; extra == 'dev'
+Requires-Dist: pytest-cov>=4.0.0; extra == 'dev'
+Requires-Dist: pytest>=7.0.0; extra == 'dev'
+Requires-Dist: types-pyyaml>=6.0.0; extra == 'dev'
+Provides-Extra: embeddings
+Requires-Dist: sentence-transformers>=2.2.0; extra == 'embeddings'
+Provides-Extra: engine
+Provides-Extra: llm
+Requires-Dist: anthropic>=0.40.0; extra == 'llm'
+Requires-Dist: ollama>=0.4.0; extra == 'llm'
+Provides-Extra: mcp
+Requires-Dist: mcp>=1.0.0; extra == 'mcp'
+Provides-Extra: ollama
+Requires-Dist: ollama>=0.4.0; extra == 'ollama'
+Provides-Extra: openai
+Requires-Dist: openai>=1.0.0; extra == 'openai'
+Description-Content-Type: text/markdown
+<div align="center">
+# buildlog
+### A measurable learning loop for AI-assisted work
+[![PyPI](https://img.shields.io/pypi/v/buildlog?style=for-the-badge&logo=pypi&logoColor=white)](https://pypi.org/project/buildlog/)
+[![Python](https://img.shields.io/pypi/pyversions/buildlog?style=for-the-badge&logo=python&logoColor=white)](https://python.org/)
+[![CI](https://img.shields.io/github/actions/workflow/status/Peleke/buildlog-template/ci.yml?branch=main&style=for-the-badge&logo=github&label=CI)](https://github.com/Peleke/buildlog-template/actions/workflows/ci.yml)
+[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg?style=for-the-badge)](https://opensource.org/licenses/MIT)
+[![Docs](https://img.shields.io/badge/docs-GitHub%20Pages-blue?style=for-the-badge&logo=github)](https://peleke.github.io/buildlog-template/)
+**Track what works. Prove it. Drop what doesn't.**
+<img src="assets/hero-banner-perfectdeliberate.png" alt="buildlog - A measurable learning loop for AI-assisted work" width="800"/>
+> **RE: The art.** Yes, it's AI-generated. Yes, that's hypocritical for a project about rigor over vibes. Looking for an actual artist to pay for a real logo. If you know someone good, [open an issue](https://github.com/Peleke/buildlog-template/issues) or DM me. Budget exists.
+**[Read the full documentation](https://peleke.github.io/buildlog-template/)**
+</div>
+---
+## The Problem
+Most AI agents do not learn. They execute without retaining context. You can bolt on memory stores and tool routers, but if the system cannot demonstrably improve its decision-making over time, you have a persistent memory store, not a learning system.
+Every AI-assisted work session produces a trajectory: goals, decisions, tool uses, corrections, outcomes. Almost all of this is discarded. The next session starts from scratch with the same blind spots.
+buildlog exists to close that gap. It captures structured trajectories from real work, extracts decision patterns, and uses statistical methods to select which patterns to surface in future sessions, then measures whether that selection actually reduced mistakes.
+buildlog measures whether the system actually got better, and proves it.
+## How It Works
+### 1. Capture structured work trajectories
+Each session is a dated entry documenting what you did, what went wrong, and what you learned. Each session is a structured record of decisions and outcomes, not a chat transcript.
+```bash
+buildlog init          # scaffold a project
+buildlog new my-feature   # start a session
+# ... work ...
+buildlog commit -m "feat: add auth"
+```
+### 2. Extract decision patterns as seeds
+The seed engine watches your development patterns and extracts **seeds**: atomic observations about what works. A seed might be "always define interfaces before implementations" or "mock at the boundary, not the implementation." Each seed carries a category, a confidence score, and source provenance.
+Extraction runs through a pipeline: `sources -> extractors -> categorizers -> generators`. Extractors range from regex-based (fast, cheap, brittle) to LLM-backed (accurate, expensive). The pipeline deduplicates semantically using embeddings.
+### 3. Select which patterns to surface using Thompson Sampling
+Seeds compete for inclusion in your agent's instruction set. The system treats each seed as an arm in a contextual bandit and uses **Thompson Sampling** to balance exploration (trying under-tested rules) against exploitation (surfacing rules with strong track records).
+Each seed maintains a Beta posterior updated by observed outcomes. Over time, the system converges on the rules that actually reduce mistakes in your specific codebase and workflow, not rules that sound good in the abstract.
+### 4. Render to every agent format
+Selected rules are written into the instruction files your agents actually read:
+- `CLAUDE.md` (Claude Code)
+- `.cursorrules` (Cursor)
+- `.github/copilot-instructions.md` (GitHub Copilot)
+- Windsurf, Continue.dev, generic `settings.json`
+The same knowledge base renders to every agent format.
+```bash
+buildlog skills   # render current policy to agent files
+```
+### 5. Close the loop with experiments
+Track whether the selected rules are working. Run experiments, measure Repeated Mistake Rate (RMR) across sessions, and get statistical evidence, not feelings, about what improved.
+```bash
+buildlog experiment start
+# ... work across sessions ...
+buildlog experiment end
+buildlog experiment report
+```
+## What Else Is In the Box
+- **Review gauntlet:** automated quality gate with curated reviewer personas. Runs on commits (via Claude Code hooks or CI) and files GitHub issues for findings, categorized by severity.
+- **LLM-backed extraction:** when regex isn't enough, the seed engine can use OpenAI, Anthropic, or Ollama to extract patterns from code and logs. Metered backend tracks token usage and cost.
+- **MCP server:** buildlog exposes itself as an MCP server so agents can query seeds, skills, and build history programmatically during sessions.
+- **npm wrapper:** `npx @peleke.s/buildlog` for JS/TS projects. Thin shim that finds and invokes the Python CLI.
+## Current Limits
+This is v0.8, not the end state.
+- **Extraction quality is uneven.** Regex extractors miss nuance; LLM extractors are accurate but expensive. The middle ground is still being found.
+- **Feedback signals are coarse.** Repeated Mistake Rate works but requires manual tagging. Richer automatic signals (test outcomes, review results, revision distance) are on the roadmap.
+- **Credit assignment is limited.** When multiple rules are active, the system doesn't yet isolate which one was responsible for an outcome.
+- **Single-agent only.** Multi-agent coordination (shared learning across agents) is designed but not implemented.
+- **Long-horizon learning is not modeled.** The bandit operates per-session. Longer arcs of competence building need richer policy models.
+The roadmap: contextual bandits (now) -> richer policy models -> longer-horizon RL -> multi-agent coordination. Each step builds on the same foundation: measuring whether rule changes actually reduce mistakes.
+## Installation
+### Global install (recommended)
+```bash
+uv tool install "buildlog[mcp]"   # or: pipx install "buildlog[mcp]"
+```
+This puts `buildlog` and `buildlog-mcp` on your PATH. Works from any directory. The `[mcp]` extra is required for the MCP server.
+### Per-project (virtual environment)
+```bash
+uv pip install "buildlog[mcp]"   # or: pip install "buildlog[mcp]"
+```
+Omit `[mcp]` if you only need the CLI.
+### For JS/TS projects
+```bash
+npx @peleke.s/buildlog init
+```
+### MCP server for Claude Code
+Add to `~/.claude/claude_code_config.json`:
+```json
+{
+  "mcpServers": {
+    "buildlog": {
+      "command": "buildlog-mcp",
+      "args": []
+    }
+  }
+}
+```
+This exposes buildlog tools (seeds, skills, experiments, gauntlet, bandit status) to any Claude Code session.
+## Quick Start
+```bash
+buildlog init              # scaffold a project (run in any repo)
+buildlog new my-feature    # start a session
+# ... work ...
+buildlog distill && buildlog skills
+buildlog experiment start
+# ... work across sessions ...
+buildlog experiment end
+buildlog experiment report
+```
+## Documentation
+| Section | Description |
+|---------|------------|
+| [Installation](https://peleke.github.io/buildlog-template/getting-started/installation/) | Setup, extras, and initialization |
+| [Quick Start](https://peleke.github.io/buildlog-template/getting-started/quick-start/) | Full pipeline walkthrough |
+| [Core Concepts](https://peleke.github.io/buildlog-template/getting-started/concepts/) | The problem, the claim, and the metric |
+| [CLI Reference](https://peleke.github.io/buildlog-template/guides/cli-reference/) | Every command documented |
+| [MCP Integration](https://peleke.github.io/buildlog-template/guides/mcp-integration/) | Claude Code setup and available tools |
+| [Experiments](https://peleke.github.io/buildlog-template/guides/experiments/) | Running and measuring experiments |
+| [Review Gauntlet](https://peleke.github.io/buildlog-template/guides/review-gauntlet/) | Reviewer personas and the gauntlet loop |
+| [Multi-Agent Setup](https://peleke.github.io/buildlog-template/guides/multi-agent/) | Render rules to any AI coding agent |
+| [Theory](https://peleke.github.io/buildlog-template/theory/00-background/) | The math behind Thompson Sampling |
+| [Philosophy](https://peleke.github.io/buildlog-template/philosophy/) | Principles and honest limitations |
+## Contributing
+```bash
+git clone https://github.com/Peleke/buildlog-template
+cd buildlog-template
+uv venv && source .venv/bin/activate
+uv pip install -e ".[dev]"
+pytest
+```
+We're especially interested in better context representations, credit assignment approaches, statistical methodology improvements, and real-world experiment results (positive or negative).
+## License
+MIT License. See [LICENSE](./LICENSE)

buildlog 0.7.0__py3-none-any.whl → 0.9.0__py3-none-any.whl

buildlog 0.7.0py3-none-any.whl → 0.9.0py3-none-any.whl