PyPI - buildlog - Versions diffs - 0.4.0__py3-none-any.whl → 0.6.0__py3-none-any.whl - Mend

buildlog 0.4.0py3-none-any.whl → 0.6.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

buildlog/cli.py +799 -3
buildlog/core/__init__.py +34 -0
buildlog/core/operations.py +925 -0
buildlog/mcp/server.py +16 -0
buildlog/mcp/tools.py +266 -1
buildlog/seed_engine/__init__.py +74 -0
buildlog/seed_engine/categorizers.py +145 -0
buildlog/seed_engine/extractors.py +148 -0
buildlog/seed_engine/generators.py +144 -0
buildlog/seed_engine/models.py +113 -0
buildlog/seed_engine/pipeline.py +202 -0
buildlog/seed_engine/sources.py +362 -0
buildlog/seeds.py +211 -0
buildlog/skills.py +26 -3
buildlog-0.6.0.dist-info/METADATA +490 -0
buildlog-0.6.0.dist-info/RECORD +38 -0
buildlog-0.4.0.dist-info/METADATA +0 -894
buildlog-0.4.0.dist-info/RECORD +0 -30
{buildlog-0.4.0.data → buildlog-0.6.0.data}/data/share/buildlog/copier.yml +0 -0
{buildlog-0.4.0.data → buildlog-0.6.0.data}/data/share/buildlog/post_gen.py +0 -0
{buildlog-0.4.0.data → buildlog-0.6.0.data}/data/share/buildlog/template/buildlog/.gitkeep +0 -0
{buildlog-0.4.0.data → buildlog-0.6.0.data}/data/share/buildlog/template/buildlog/2026-01-01-example.md +0 -0
{buildlog-0.4.0.data → buildlog-0.6.0.data}/data/share/buildlog/template/buildlog/BUILDLOG_SYSTEM.md +0 -0
{buildlog-0.4.0.data → buildlog-0.6.0.data}/data/share/buildlog/template/buildlog/_TEMPLATE.md +0 -0
{buildlog-0.4.0.data → buildlog-0.6.0.data}/data/share/buildlog/template/buildlog/assets/.gitkeep +0 -0
{buildlog-0.4.0.dist-info → buildlog-0.6.0.dist-info}/WHEEL +0 -0
{buildlog-0.4.0.dist-info → buildlog-0.6.0.dist-info}/entry_points.txt +0 -0
{buildlog-0.4.0.dist-info → buildlog-0.6.0.dist-info}/licenses/LICENSE +0 -0

buildlog/mcp/server.py CHANGED Viewed

@@ -6,9 +6,16 @@ from mcp.server.fastmcp import FastMCP
 from buildlog.mcp.tools import (
     buildlog_diff,
+    buildlog_end_session,
+    buildlog_experiment_report,
     buildlog_learn_from_review,
+    buildlog_log_mistake,
+    buildlog_log_reward,
     buildlog_promote,
     buildlog_reject,
+    buildlog_rewards,
+    buildlog_session_metrics,
+    buildlog_start_session,
     buildlog_status,
 )
@@ -20,6 +27,15 @@ mcp.tool()(buildlog_promote)
 mcp.tool()(buildlog_reject)
 mcp.tool()(buildlog_diff)
 mcp.tool()(buildlog_learn_from_review)
+mcp.tool()(buildlog_log_reward)
+mcp.tool()(buildlog_rewards)
+# Session tracking tools (experiment infrastructure)
+mcp.tool()(buildlog_start_session)
+mcp.tool()(buildlog_end_session)
+mcp.tool()(buildlog_log_mistake)
+mcp.tool()(buildlog_session_metrics)
+mcp.tool()(buildlog_experiment_report)
 def main() -> None:

buildlog/mcp/tools.py CHANGED Viewed

@@ -9,7 +9,20 @@ from dataclasses import asdict
 from pathlib import Path
 from typing import Literal
-from buildlog.core import diff, learn_from_review, promote, reject, status
+from buildlog.core import (
+    diff,
+    end_session,
+    get_experiment_report,
+    get_rewards,
+    get_session_metrics,
+    learn_from_review,
+    log_mistake,
+    log_reward,
+    promote,
+    reject,
+    start_session,
+    status,
+)
 def _validate_skill_ids(skill_ids: list[str]) -> list[str]:
@@ -140,3 +153,255 @@ def buildlog_learn_from_review(
     """
     result = learn_from_review(Path(buildlog_dir), issues, source)
     return asdict(result)
+def buildlog_log_reward(
+    outcome: str,
+    rules_active: list[str] | None = None,
+    revision_distance: float | None = None,
+    error_class: str | None = None,
+    notes: str | None = None,
+    buildlog_dir: str = "buildlog",
+) -> dict:
+    """Log a reward signal for bandit learning.
+    Call this after agent work to provide feedback on the outcome.
+    This enables learning which rules are effective in which contexts.
+    Args:
+        outcome: Type of feedback:
+            - "accepted": Work was accepted as-is (reward=1.0)
+            - "revision": Work needed changes (reward=1-distance)
+            - "rejected": Work was rejected entirely (reward=0.0)
+        rules_active: List of rule IDs that were in context during the work
+        revision_distance: How much correction was needed (0-1, 0=minor tweak, 1=complete redo)
+        error_class: Category of error if applicable (e.g., "missing_test", "validation_boundary")
+        notes: Optional notes about the feedback
+        buildlog_dir: Path to buildlog directory
+    Returns:
+        Dict with reward_id, reward_value, total_events
+    Example:
+        # Work was accepted
+        buildlog_log_reward(outcome="accepted", rules_active=["arch-123", "wf-456"])
+        # Work needed revision
+        buildlog_log_reward(
+            outcome="revision",
+            revision_distance=0.3,
+            error_class="missing_test",
+            notes="Forgot to test error path"
+        )
+        # Work was rejected
+        buildlog_log_reward(outcome="rejected", notes="Completely wrong approach")
+    """
+    # Validate outcome
+    if outcome not in ("accepted", "revision", "rejected"):
+        return {
+            "reward_id": "",
+            "reward_value": 0.0,
+            "total_events": 0,
+            "message": "",
+            "error": f"Invalid outcome: {outcome}. Must be 'accepted', 'revision', or 'rejected'",
+        }
+    result = log_reward(
+        Path(buildlog_dir),
+        outcome=outcome,  # type: ignore[arg-type]
+        rules_active=rules_active,
+        revision_distance=revision_distance,
+        error_class=error_class,
+        notes=notes,
+        source="mcp",
+    )
+    return asdict(result)
+def buildlog_rewards(
+    limit: int | None = None,
+    buildlog_dir: str = "buildlog",
+) -> dict:
+    """Get reward events with summary statistics.
+    Returns recent reward events and aggregate statistics useful for
+    understanding learning progress.
+    Args:
+        limit: Maximum number of events to return (most recent first)
+        buildlog_dir: Path to buildlog directory
+    Returns:
+        Dict with:
+            - total_events: Total count of reward events
+            - accepted: Count of accepted outcomes
+            - revisions: Count of revision outcomes
+            - rejected: Count of rejected outcomes
+            - mean_reward: Average reward value
+            - events: List of recent events (limited)
+    Example:
+        buildlog_rewards(limit=10)  # Get 10 most recent events with stats
+    """
+    result = get_rewards(Path(buildlog_dir), limit)
+    # Convert events to dicts
+    return {
+        "total_events": result.total_events,
+        "accepted": result.accepted,
+        "revisions": result.revisions,
+        "rejected": result.rejected,
+        "mean_reward": result.mean_reward,
+        "events": [e.to_dict() for e in result.events],
+    }
+# -----------------------------------------------------------------------------
+# Session Tracking MCP Tools (Experiment Infrastructure)
+# -----------------------------------------------------------------------------
+def buildlog_start_session(
+    error_class: str | None = None,
+    notes: str | None = None,
+    buildlog_dir: str = "buildlog",
+) -> dict:
+    """Start a new experiment session.
+    Begins tracking for a learning experiment. Captures the current
+    set of active rules to measure learning over time.
+    Args:
+        error_class: Error class being targeted (e.g., "missing_test")
+        notes: Notes about this session
+        buildlog_dir: Path to buildlog directory
+    Returns:
+        Dict with session_id, error_class, rules_count, message
+    Example:
+        buildlog_start_session(error_class="missing_test")
+    """
+    result = start_session(
+        Path(buildlog_dir),
+        error_class=error_class,
+        notes=notes,
+    )
+    return asdict(result)
+def buildlog_end_session(
+    entry_file: str | None = None,
+    notes: str | None = None,
+    buildlog_dir: str = "buildlog",
+) -> dict:
+    """End the current experiment session.
+    Finalizes the session and calculates metrics including:
+    - Total mistakes logged
+    - Repeated mistakes (from prior sessions)
+    - Rules added during session
+    Args:
+        entry_file: Corresponding buildlog entry file, if any
+        notes: Additional notes to append
+        buildlog_dir: Path to buildlog directory
+    Returns:
+        Dict with session_id, duration_minutes, mistakes_logged,
+        repeated_mistakes, rules_at_start, rules_at_end, message
+    Example:
+        buildlog_end_session(entry_file="2026-01-21.md")
+    """
+    result = end_session(
+        Path(buildlog_dir),
+        entry_file=entry_file,
+        notes=notes,
+    )
+    return asdict(result)
+def buildlog_log_mistake(
+    error_class: str,
+    description: str,
+    corrected_by_rule: str | None = None,
+    buildlog_dir: str = "buildlog",
+) -> dict:
+    """Log a mistake during the current session.
+    Records the mistake and checks if it's a repeat of a prior mistake
+    (from earlier sessions). This enables measuring repeated-mistake rates.
+    Args:
+        error_class: Category of error (e.g., "missing_test")
+        description: Description of the mistake
+        corrected_by_rule: Rule ID that should have prevented this
+        buildlog_dir: Path to buildlog directory
+    Returns:
+        Dict with mistake_id, session_id, was_repeat, similar_prior, message
+    Example:
+        buildlog_log_mistake(
+            error_class="missing_test",
+            description="Forgot to add unit tests for new helper function"
+        )
+    """
+    result = log_mistake(
+        Path(buildlog_dir),
+        error_class=error_class,
+        description=description,
+        corrected_by_rule=corrected_by_rule,
+    )
+    return asdict(result)
+def buildlog_session_metrics(
+    session_id: str | None = None,
+    buildlog_dir: str = "buildlog",
+) -> dict:
+    """Get metrics for a session or all sessions.
+    Returns mistake rates and rule changes for analysis.
+    Args:
+        session_id: Specific session ID, or None for aggregate metrics
+        buildlog_dir: Path to buildlog directory
+    Returns:
+        Dict with session_id, total_mistakes, repeated_mistakes,
+        repeated_mistake_rate, rules_at_start, rules_at_end, rules_added
+    Example:
+        buildlog_session_metrics()  # Aggregate metrics
+        buildlog_session_metrics(session_id="session-20260121-140000")
+    """
+    result = get_session_metrics(
+        Path(buildlog_dir),
+        session_id=session_id,
+    )
+    return asdict(result)
+def buildlog_experiment_report(
+    buildlog_dir: str = "buildlog",
+) -> dict:
+    """Generate a comprehensive experiment report.
+    Returns summary statistics, per-session breakdown, and error class analysis.
+    Args:
+        buildlog_dir: Path to buildlog directory
+    Returns:
+        Dict with:
+            - summary: Overall statistics
+            - sessions: Per-session breakdown
+            - error_classes: Breakdown by error class
+    Example:
+        buildlog_experiment_report()
+    """
+    return get_experiment_report(Path(buildlog_dir))

buildlog/seed_engine/__init__.py ADDED Viewed

@@ -0,0 +1,74 @@
+"""Seed Engine - Formalized pipeline for creating reviewer personas.
+The seed engine abstracts the 4-step process for bootstrapping
+defensible reviewer personas from authoritative domain sources:
+    1. SOURCE IDENTIFICATION - Define authoritative sources
+    2. RULE EXTRACTION - Extract candidate rules with defensibility fields
+    3. CATEGORIZATION - Map rules to persona concern categories
+    4. SEED GENERATION - Output validated YAML seed file
+Usage:
+    from buildlog.seed_engine import Pipeline, Source, SourceType
+    # Define sources
+    sources = [
+        Source(
+            name="OWASP Top 10",
+            url="https://owasp.org/Top10/",
+            source_type=SourceType.REFERENCE_DOC,
+            domain="security",
+        )
+    ]
+    # Run pipeline
+    pipeline = Pipeline(persona="security_karen")
+    seed_file = pipeline.run(sources)
+"""
+from buildlog.seed_engine.categorizers import (
+    Categorizer,
+    CategoryMapping,
+    TagBasedCategorizer,
+)
+from buildlog.seed_engine.extractors import ManualExtractor, RuleExtractor
+from buildlog.seed_engine.generators import SeedGenerator
+from buildlog.seed_engine.models import (
+    CandidateRule,
+    CategorizedRule,
+    Source,
+    SourceType,
+)
+from buildlog.seed_engine.pipeline import Pipeline
+from buildlog.seed_engine.sources import (
+    FetchStatus,
+    SourceEntry,
+    SourceFetcher,
+    SourceManifest,
+    url_to_cache_filename,
+)
+__all__ = [
+    # Models
+    "Source",
+    "SourceType",
+    "CandidateRule",
+    "CategorizedRule",
+    # Pipeline
+    "Pipeline",
+    # Extractors
+    "RuleExtractor",
+    "ManualExtractor",
+    # Categorizers
+    "Categorizer",
+    "TagBasedCategorizer",
+    "CategoryMapping",
+    # Generators
+    "SeedGenerator",
+    # Sources
+    "FetchStatus",
+    "SourceEntry",
+    "SourceManifest",
+    "SourceFetcher",
+    "url_to_cache_filename",
+]

buildlog/seed_engine/categorizers.py ADDED Viewed

@@ -0,0 +1,145 @@
+"""Rule categorizers for Step 3 of the seed engine pipeline.
+Categorizers take candidate rules and assign final categories and tags.
+"""
+from __future__ import annotations
+from abc import ABC, abstractmethod
+from collections.abc import Callable
+from dataclasses import dataclass
+from buildlog.seed_engine.models import CandidateRule, CategorizedRule
+class Categorizer(ABC):
+    """Protocol for categorizing rules.
+    Implementations:
+    - TagBasedCategorizer: Category from tags/keywords
+    - MappingCategorizer: Explicit source→category mapping
+    """
+    @abstractmethod
+    def categorize(self, rule: CandidateRule) -> CategorizedRule:
+        """Assign category and final tags to a rule.
+        Args:
+            rule: The candidate rule to categorize.
+        Returns:
+            Categorized rule ready for seed generation.
+        """
+        ...
+@dataclass
+class CategoryMapping:
+    """Mapping from keywords/tags to category."""
+    category: str
+    keywords: list[str]  # If any of these appear in tags/rule, assign this category
+    priority: int = 0  # Higher priority wins on conflicts
+class TagBasedCategorizer(Categorizer):
+    """Categorize rules based on their tags and keywords.
+    Usage:
+        categorizer = TagBasedCategorizer(
+            default_category="testing",
+            mappings=[
+                CategoryMapping("coverage", ["coverage", "untested"]),
+                CategoryMapping("isolation", ["flaky", "order", "hermetic"]),
+                CategoryMapping("assertions", ["assert", "expect", "verify"]),
+            ],
+            tag_normalizer=lambda t: t.lower().replace("-", "_"),
+        )
+        categorized = categorizer.categorize(candidate_rule)
+    """
+    def __init__(
+        self,
+        default_category: str,
+        mappings: list[CategoryMapping] | None = None,
+        tag_normalizer: Callable[[str], str] | None = None,
+        additional_tags: list[str] | None = None,
+    ) -> None:
+        self.default_category = default_category
+        self.mappings = sorted(mappings or [], key=lambda m: m.priority, reverse=True)
+        self.tag_normalizer = tag_normalizer or (lambda t: t.lower())
+        self.additional_tags = additional_tags or []
+    def categorize(self, rule: CandidateRule) -> CategorizedRule:
+        """Assign category based on tag matching."""
+        # Normalize tags
+        normalized_tags = [self.tag_normalizer(t) for t in rule.raw_tags]
+        # Also check rule text for keywords
+        rule_text_lower = rule.rule.lower()
+        # Find matching category
+        category = self.default_category
+        for mapping in self.mappings:
+            for keyword in mapping.keywords:
+                keyword_lower = keyword.lower()
+                if keyword_lower in normalized_tags or keyword_lower in rule_text_lower:
+                    category = mapping.category
+                    break
+            else:
+                continue
+            break
+        # Build final tags
+        final_tags = list(set(normalized_tags + self.additional_tags))
+        return CategorizedRule.from_candidate(
+            candidate=rule,
+            category=category,
+            tags=final_tags,
+        )
+class MappingCategorizer(Categorizer):
+    """Categorize rules via explicit source→category mapping.
+    Useful when sources map directly to categories
+    (e.g., OWASP A03 → "injection").
+    Usage:
+        categorizer = MappingCategorizer(
+            source_category_map={
+                "https://owasp.org/Top10/A03": "injection",
+                "https://owasp.org/Top10/A01": "access-control",
+            },
+            default_category="security",
+        )
+    """
+    def __init__(
+        self,
+        source_category_map: dict[str, str],
+        default_category: str,
+        tag_transform: Callable[[list[str]], list[str]] | None = None,
+    ) -> None:
+        self.source_category_map = source_category_map
+        self.default_category = default_category
+        self.tag_transform = tag_transform or (lambda tags: tags)
+    def categorize(self, rule: CandidateRule) -> CategorizedRule:
+        """Assign category based on source URL."""
+        # Find category by matching source URL prefix
+        category = self.default_category
+        for url_prefix, cat in self.source_category_map.items():
+            if rule.source.url.startswith(url_prefix):
+                category = cat
+                break
+        final_tags = self.tag_transform(rule.raw_tags)
+        return CategorizedRule.from_candidate(
+            candidate=rule,
+            category=category,
+            tags=final_tags,
+        )

buildlog/seed_engine/extractors.py ADDED Viewed

@@ -0,0 +1,148 @@
+"""Rule extractors for Step 2 of the seed engine pipeline.
+Extractors take sources and produce candidate rules.
+"""
+from __future__ import annotations
+from abc import ABC, abstractmethod
+from typing import Callable
+from buildlog.seed_engine.models import CandidateRule, Source
+class RuleExtractor(ABC):
+    """Protocol for extracting rules from sources.
+    Implementations:
+    - ManualExtractor: Human-curated rules (highest quality)
+    - LLMExtractor: LLM-assisted extraction (future)
+    - StructuredExtractor: Parse structured docs like OWASP (future)
+    """
+    @abstractmethod
+    def extract(self, source: Source) -> list[CandidateRule]:
+        """Extract candidate rules from a source.
+        Args:
+            source: The source to extract rules from.
+        Returns:
+            List of candidate rules with defensibility fields.
+        """
+        ...
+    @abstractmethod
+    def validate(self, rule: CandidateRule) -> list[str]:
+        """Validate a candidate rule, returning any issues.
+        Args:
+            rule: The rule to validate.
+        Returns:
+            List of validation issues (empty if valid).
+        """
+        ...
+class ManualExtractor(RuleExtractor):
+    """Manual rule extraction via human curation.
+    This is the gold standard—humans read the source and
+    extract rules with full defensibility metadata.
+    Usage:
+        extractor = ManualExtractor()
+        # Register rules for a source
+        extractor.register(
+            source=google_testing_blog,
+            rules=[
+                CandidateRule(
+                    rule="Tests must not depend on execution order",
+                    context="Test suites with multiple tests",
+                    antipattern="Test A sets state that Test B relies on",
+                    rationale="Order-dependent tests are flaky",
+                    source=google_testing_blog,
+                    raw_tags=["isolation", "flaky"],
+                )
+            ]
+        )
+        # Extract returns registered rules
+        rules = extractor.extract(google_testing_blog)
+    """
+    def __init__(self) -> None:
+        self._rules_by_source: dict[str, list[CandidateRule]] = {}
+    def register(self, source: Source, rules: list[CandidateRule]) -> None:
+        """Register manually curated rules for a source.
+        Args:
+            source: The source these rules come from.
+            rules: The curated rules.
+        """
+        # Validate all rules are complete
+        for rule in rules:
+            issues = self.validate(rule)
+            if issues:
+                raise ValueError(
+                    f"Invalid rule '{rule.rule[:50]}...': {'; '.join(issues)}"
+                )
+        self._rules_by_source[source.url] = rules
+    def extract(self, source: Source) -> list[CandidateRule]:
+        """Return registered rules for this source."""
+        return self._rules_by_source.get(source.url, [])
+    def validate(self, rule: CandidateRule) -> list[str]:
+        """Validate defensibility fields are populated."""
+        issues = []
+        if not rule.rule.strip():
+            issues.append("Rule text is empty")
+        if not rule.context.strip():
+            issues.append("Context is required for defensibility")
+        if not rule.antipattern.strip():
+            issues.append("Antipattern is required for defensibility")
+        if not rule.rationale.strip():
+            issues.append("Rationale is required for defensibility")
+        return issues
+class FunctionExtractor(RuleExtractor):
+    """Extraction via custom function (for structured sources).
+    Allows plugging in custom extraction logic for sources
+    with known structure (e.g., OWASP pages, API docs).
+    Usage:
+        def extract_from_owasp(source: Source) -> list[CandidateRule]:
+            # Custom parsing logic for OWASP format
+            ...
+        extractor = FunctionExtractor(extract_from_owasp)
+        rules = extractor.extract(owasp_source)
+    """
+    def __init__(
+        self,
+        extract_fn: Callable[[Source], list[CandidateRule]],
+        validate_fn: Callable[[CandidateRule], list[str]] | None = None,
+    ) -> None:
+        self._extract_fn = extract_fn
+        self._validate_fn = validate_fn or self._default_validate
+    def extract(self, source: Source) -> list[CandidateRule]:
+        """Run the custom extraction function."""
+        return self._extract_fn(source)
+    def validate(self, rule: CandidateRule) -> list[str]:
+        """Run the validation function."""
+        return self._validate_fn(rule)
+    def _default_validate(self, rule: CandidateRule) -> list[str]:
+        """Default validation: check completeness."""
+        if not rule.is_complete():
+            return ["Rule is missing required defensibility fields"]
+        return []

buildlog 0.4.0__py3-none-any.whl → 0.6.0__py3-none-any.whl

buildlog 0.4.0py3-none-any.whl → 0.6.0py3-none-any.whl