PyPI - buildlog - Versions diffs - 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl - Mend

buildlog 0.1.0py3-none-any.whl → 0.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

buildlog/cli.py +46 -23
buildlog/core/operations.py +11 -15
buildlog/distill.py +3 -3
buildlog/embeddings.py +108 -16
buildlog/mcp/tools.py +4 -4
buildlog/render/__init__.py +34 -11
buildlog/render/claude_md.py +3 -24
buildlog/render/settings_json.py +3 -23
buildlog/render/skill.py +175 -0
buildlog/render/tracking.py +43 -0
buildlog/skills.py +118 -37
buildlog/stats.py +7 -5
buildlog-0.2.0.dist-info/METADATA +762 -0
buildlog-0.2.0.dist-info/RECORD +29 -0
buildlog-0.1.0.dist-info/METADATA +0 -664
buildlog-0.1.0.dist-info/RECORD +0 -27
{buildlog-0.1.0.data → buildlog-0.2.0.data}/data/share/buildlog/copier.yml +0 -0
{buildlog-0.1.0.data → buildlog-0.2.0.data}/data/share/buildlog/post_gen.py +0 -0
{buildlog-0.1.0.data → buildlog-0.2.0.data}/data/share/buildlog/template/buildlog/.gitkeep +0 -0
{buildlog-0.1.0.data → buildlog-0.2.0.data}/data/share/buildlog/template/buildlog/2026-01-01-example.md +0 -0
{buildlog-0.1.0.data → buildlog-0.2.0.data}/data/share/buildlog/template/buildlog/BUILDLOG_SYSTEM.md +0 -0
{buildlog-0.1.0.data → buildlog-0.2.0.data}/data/share/buildlog/template/buildlog/_TEMPLATE.md +0 -0
{buildlog-0.1.0.data → buildlog-0.2.0.data}/data/share/buildlog/template/buildlog/assets/.gitkeep +0 -0
{buildlog-0.1.0.dist-info → buildlog-0.2.0.dist-info}/WHEEL +0 -0
{buildlog-0.1.0.dist-info → buildlog-0.2.0.dist-info}/entry_points.txt +0 -0
{buildlog-0.1.0.dist-info → buildlog-0.2.0.dist-info}/licenses/LICENSE +0 -0

buildlog/render/skill.py ADDED Viewed

@@ -0,0 +1,175 @@
+"""Render skills to Anthropic Agent Skills format.
+Creates .claude/skills/buildlog-learned/SKILL.md that can be loaded
+on-demand by Claude Code and other Anthropic tools.
+"""
+from __future__ import annotations
+from datetime import datetime
+from pathlib import Path
+from typing import TYPE_CHECKING
+from buildlog.render.tracking import track_promoted
+if TYPE_CHECKING:
+    from buildlog.skills import Skill
+class SkillRenderer:
+    """Creates .claude/skills/buildlog-learned/SKILL.md
+    This renderer produces Anthropic Agent Skills format, which allows
+    for on-demand loading of project-specific patterns by Claude.
+    """
+    def __init__(
+        self,
+        path: Path | None = None,
+        tracking_path: Path | None = None,
+        skill_name: str = "buildlog-learned",
+    ):
+        """Initialize renderer.
+        Args:
+            path: Path to SKILL.md file. Defaults to .claude/skills/{skill_name}/SKILL.md.
+            tracking_path: Path to promoted.json tracking file.
+                Defaults to .buildlog/promoted.json.
+            skill_name: Name of the skill directory. Defaults to "buildlog-learned".
+                Must not contain path separators or parent references.
+        Raises:
+            ValueError: If skill_name contains path traversal characters.
+        """
+        # Security: Validate skill_name to prevent path traversal
+        if "/" in skill_name or "\\" in skill_name or ".." in skill_name:
+            raise ValueError(
+                f"Invalid skill_name: {skill_name!r}. "
+                "Must not contain path separators or '..'."
+            )
+        self.skill_name = skill_name
+        self.path = path or Path(f".claude/skills/{skill_name}/SKILL.md")
+        self.tracking_path = tracking_path or Path(".buildlog/promoted.json")
+    def render(self, skills: list[Skill]) -> str:
+        """Render skills to SKILL.md format.
+        Args:
+            skills: List of skills to render.
+        Returns:
+            Confirmation message describing what was written.
+        """
+        if not skills:
+            return "No skills to promote"
+        # Group by confidence, then category
+        by_confidence: dict[str, dict[str, list[Skill]]] = {
+            "high": {},
+            "medium": {},
+            "low": {},
+        }
+        for skill in skills:
+            conf = skill.confidence
+            cat = skill.category
+            by_confidence[conf].setdefault(cat, []).append(skill)
+        # Build SKILL.md content
+        categories = sorted(set(s.category for s in skills))
+        category_display = ", ".join(self._category_title(c) for c in categories)
+        lines = [
+            "---",
+            f"name: {self.skill_name}",
+            f"description: Project-specific patterns learned from development history. "
+            f"Use when writing code, making architectural decisions, reviewing PRs, "
+            f"or ensuring consistency. Contains {len(skills)} rules across "
+            f"{category_display}.",
+            "---",
+            "",
+            "# Learned Patterns",
+            "",
+            f"*{len(skills)} rules extracted from buildlog entries on "
+            f"{datetime.now().strftime('%Y-%m-%d')}*",
+            "",
+        ]
+        # High confidence = Must Follow
+        if by_confidence["high"]:
+            lines.extend(
+                self._render_confidence_section(
+                    "Must Follow (High Confidence)",
+                    "These patterns have been reinforced multiple times.",
+                    by_confidence["high"],
+                )
+            )
+        # Medium confidence = Should Consider
+        if by_confidence["medium"]:
+            lines.extend(
+                self._render_confidence_section(
+                    "Should Consider (Medium Confidence)",
+                    "These patterns appear frequently but may have exceptions.",
+                    by_confidence["medium"],
+                )
+            )
+        # Low confidence = Worth Knowing
+        if by_confidence["low"]:
+            lines.extend(
+                self._render_confidence_section(
+                    "Worth Knowing (Low Confidence)",
+                    "Emerging patterns worth being aware of.",
+                    by_confidence["low"],
+                )
+            )
+        content = "\n".join(lines)
+        # Write file
+        self.path.parent.mkdir(parents=True, exist_ok=True)
+        self.path.write_text(content)
+        # Track promoted using shared utility
+        track_promoted(skills, self.tracking_path)
+        return f"Created skill at {self.path}"
+    def _category_title(self, category: str) -> str:
+        """Convert category slug to display title."""
+        titles = {
+            "architectural": "Architectural",
+            "workflow": "Workflow",
+            "tool_usage": "Tool Usage",
+            "domain_knowledge": "Domain Knowledge",
+        }
+        return titles.get(category, category.replace("_", " ").title())
+    def _render_confidence_section(
+        self,
+        title: str,
+        description: str,
+        by_category: dict[str, list[Skill]],
+    ) -> list[str]:
+        """Render a confidence-level section.
+        Args:
+            title: Section title (e.g., "Must Follow (High Confidence)").
+            description: Description of what this confidence level means.
+            by_category: Skills grouped by category.
+        Returns:
+            List of markdown lines for this section.
+        """
+        lines = [f"## {title}", "", description, ""]
+        for category, cat_skills in sorted(by_category.items()):
+            cat_title = self._category_title(category)
+            lines.append(f"### {cat_title}")
+            lines.append("")
+            for skill in cat_skills:
+                # Don't add confidence prefix - section already indicates confidence
+                lines.append(f"- {skill.rule}")
+            lines.append("")
+        return lines

buildlog/render/tracking.py ADDED Viewed

@@ -0,0 +1,43 @@
+"""Shared tracking utilities for render adapters."""
+from __future__ import annotations
+import json
+from datetime import datetime
+from pathlib import Path
+from typing import TYPE_CHECKING
+if TYPE_CHECKING:
+    from buildlog.skills import Skill
+__all__ = ["track_promoted"]
+def track_promoted(skills: list[Skill], tracking_path: Path) -> None:
+    """Track which skills have been promoted.
+    Writes skill IDs and promotion timestamps to a JSON file.
+    Handles corrupt JSON gracefully by starting fresh.
+    Args:
+        skills: Skills that were promoted.
+        tracking_path: Path to the tracking JSON file.
+    """
+    tracking_path.parent.mkdir(parents=True, exist_ok=True)
+    # Load existing tracking data (handle corrupt JSON)
+    tracking: dict = {"skill_ids": [], "promoted_at": {}}
+    if tracking_path.exists():
+        try:
+            tracking = json.loads(tracking_path.read_text())
+        except json.JSONDecodeError:
+            pass  # Start fresh if corrupted
+    # Add new skill IDs
+    now = datetime.now().isoformat()
+    for skill in skills:
+        if skill.id not in tracking["skill_ids"]:
+            tracking["skill_ids"].append(skill.id)
+            tracking["promoted_at"][skill.id] = now
+    tracking_path.write_text(json.dumps(tracking, indent=2))

buildlog/skills.py CHANGED Viewed

@@ -19,7 +19,7 @@ import json
 import logging
 import re
 from dataclasses import dataclass, field
-from datetime import UTC, date, datetime
+from datetime import date, datetime, timezone
 from pathlib import Path
 from typing import Final, Literal, TypedDict
@@ -134,8 +134,6 @@ def _generate_skill_id(category: str, rule: str) -> str:
     return f"{prefix}-{rule_hash}"
 def _calculate_confidence(
     frequency: int,
     most_recent_date: date | None,
@@ -164,7 +162,10 @@ def _calculate_confidence(
     if most_recent_date:
         recency_days = (reference_date - most_recent_date).days
-    if frequency >= HIGH_CONFIDENCE_FREQUENCY and recency_days < HIGH_CONFIDENCE_RECENCY_DAYS:
+    if (
+        frequency >= HIGH_CONFIDENCE_FREQUENCY
+        and recency_days < HIGH_CONFIDENCE_RECENCY_DAYS
+    ):
         return "high"
     elif frequency >= MEDIUM_CONFIDENCE_FREQUENCY:
         return "medium"
@@ -179,12 +180,44 @@ def _extract_tags(rule: str) -> list[str]:
     """
     # Common tech/concept terms to extract as tags
     known_tags = {
-        "api", "http", "json", "yaml", "sql", "database", "cache",
-        "redis", "supabase", "postgres", "mongodb", "git", "docker",
-        "kubernetes", "aws", "gcp", "azure", "react", "python",
-        "typescript", "javascript", "rust", "go", "test", "testing",
-        "ci", "cd", "deploy", "error", "retry", "timeout", "auth",
-        "jwt", "oauth", "plugin", "middleware", "async", "sync",
+        "api",
+        "http",
+        "json",
+        "yaml",
+        "sql",
+        "database",
+        "cache",
+        "redis",
+        "supabase",
+        "postgres",
+        "mongodb",
+        "git",
+        "docker",
+        "kubernetes",
+        "aws",
+        "gcp",
+        "azure",
+        "react",
+        "python",
+        "typescript",
+        "javascript",
+        "rust",
+        "go",
+        "test",
+        "testing",
+        "ci",
+        "cd",
+        "deploy",
+        "error",
+        "retry",
+        "timeout",
+        "auth",
+        "jwt",
+        "oauth",
+        "plugin",
+        "middleware",
+        "async",
+        "sync",
     }
     # Word variants that map to canonical tags
@@ -301,7 +334,11 @@ def generate_skills(
     result = distill_all(buildlog_dir, since=since_date)
     # Get embedding backend
-    backend = get_backend(embedding_backend) if embedding_backend else get_default_backend()
+    backend = (
+        get_backend(embedding_backend)  # type: ignore[arg-type]
+        if embedding_backend
+        else get_default_backend()
+    )
     logger.info("Using embedding backend: %s", backend.name)
     skills_by_category: dict[str, list[Skill]] = {}
@@ -331,7 +368,7 @@ def generate_skills(
         skills_by_category[category] = skills
     return SkillSet(
-        generated_at=datetime.now(UTC).isoformat().replace("+00:00", "Z"),
+        generated_at=datetime.now(timezone.utc).isoformat().replace("+00:00", "Z"),
         source_entries=result.entry_count,
         skills=skills_by_category,
     )
@@ -347,7 +384,9 @@ def _format_yaml(skill_set: SkillSet) -> str:
         ) from e
     data = skill_set.to_dict()
-    return yaml.dump(data, default_flow_style=False, allow_unicode=True, sort_keys=False)
+    return yaml.dump(
+        data, default_flow_style=False, allow_unicode=True, sort_keys=False
+    )
 def _format_json(skill_set: SkillSet) -> str:
@@ -361,8 +400,10 @@ def _format_markdown(skill_set: SkillSet) -> str:
     lines.append("## Learned Skills")
     lines.append("")
-    lines.append(f"Based on {skill_set.source_entries} buildlog entries, "
-                 f"{skill_set.total_skills} actionable skills have emerged:")
+    lines.append(
+        f"Based on {skill_set.source_entries} buildlog entries, "
+        f"{skill_set.total_skills} actionable skills have emerged:"
+    )
     lines.append("")
     category_titles = {
@@ -384,7 +425,9 @@ def _format_markdown(skill_set: SkillSet) -> str:
             confidence_badge = {"high": "🟢", "medium": "🟡", "low": "⚪"}.get(
                 skill.confidence, ""
             )
-            freq_text = f"seen {skill.frequency}x" if skill.frequency > 1 else "seen once"
+            freq_text = (
+                f"seen {skill.frequency}x" if skill.frequency > 1 else "seen once"
+            )
             lines.append(f"- {confidence_badge} **{skill.rule}** ({freq_text})")
         lines.append("")
@@ -397,16 +440,23 @@ def _format_markdown(skill_set: SkillSet) -> str:
 # Pre-compiled patterns for _to_imperative (module-level for efficiency)
 _NEGATIVE_PATTERNS = tuple(
-    re.compile(p) for p in (
-        r"\bdon't\b", r"\bdo not\b", r"\bnever\b", r"\bavoid\b",
-        r"\bstop\b", r"\bshouldn't\b", r"\bshould not\b",
+    re.compile(p)
+    for p in (
+        r"\bdon't\b",
+        r"\bdo not\b",
+        r"\bnever\b",
+        r"\bavoid\b",
+        r"\bstop\b",
+        r"\bshouldn't\b",
+        r"\bshould not\b",
     )
 )
 # Comparison patterns - intentionally narrow to avoid false positives
 # "over" alone matches "all over", "game over" etc. so we require context
 _COMPARISON_PATTERNS = tuple(
-    re.compile(p) for p in (
+    re.compile(p)
+    for p in (
         r"\binstead of\b",
         r"\brather than\b",
         r"\bbetter than\b",
@@ -416,10 +466,22 @@ _COMPARISON_PATTERNS = tuple(
 # Verbs that need -ing form when following "Avoid" or bare "Prefer"
 _VERB_TO_GERUND: Final[dict[str, str]] = {
-    "use": "using", "run": "running", "make": "making", "write": "writing",
-    "read": "reading", "put": "putting", "get": "getting", "set": "setting",
-    "add": "adding", "create": "creating", "delete": "deleting", "call": "calling",
-    "pass": "passing", "send": "sending", "store": "storing", "cache": "caching",
+    "use": "using",
+    "run": "running",
+    "make": "making",
+    "write": "writing",
+    "read": "reading",
+    "put": "putting",
+    "get": "getting",
+    "set": "setting",
+    "add": "adding",
+    "create": "creating",
+    "delete": "deleting",
+    "call": "calling",
+    "pass": "passing",
+    "send": "sending",
+    "store": "storing",
+    "cache": "caching",
 }
@@ -456,8 +518,14 @@ def _to_imperative(rule: str, confidence: ConfidenceLevel) -> str:
     # Already has a confidence modifier - just capitalize and return
     confidence_modifiers = (
-        "always", "never", "prefer", "avoid", "consider", "remember",
-        "don't", "do not",
+        "always",
+        "never",
+        "prefer",
+        "avoid",
+        "consider",
+        "remember",
+        "don't",
+        "do not",
     )
     if any(rule_lower.startswith(word) for word in confidence_modifiers):
         return rule[0].upper() + rule[1:]
@@ -485,16 +553,23 @@ def _to_imperative(rule: str, confidence: ConfidenceLevel) -> str:
     # Clean up the rule for prefixing
     # Remove leading "should" type words (order matters - longer first)
     cleaners = [
-        "you shouldn't ", "we shouldn't ", "shouldn't ",
-        "you should not ", "we should not ", "should not ",
-        "you should ", "we should ", "should ",
-        "it's better to ", "it is better to ",
+        "you shouldn't ",
+        "we shouldn't ",
+        "shouldn't ",
+        "you should not ",
+        "we should not ",
+        "should not ",
+        "you should ",
+        "we should ",
+        "should ",
+        "it's better to ",
+        "it is better to ",
     ]
     cleaned = rule
     cleaned_lower = rule_lower
     for cleaner in cleaners:
         if cleaned_lower.startswith(cleaner):
-            cleaned = cleaned[len(cleaner):]
+            cleaned = cleaned[len(cleaner) :]
             cleaned_lower = cleaned.lower()
             break
@@ -505,10 +580,12 @@ def _to_imperative(rule: str, confidence: ConfidenceLevel) -> str:
     # Avoid double words: "Avoid avoid using..." -> "Avoid using..."
     prefix_lower = prefix.lower()
-    if cleaned_lower.startswith(prefix_lower + " ") or cleaned_lower.startswith(prefix_lower + "ing "):
+    if cleaned_lower.startswith(prefix_lower + " ") or cleaned_lower.startswith(
+        prefix_lower + "ing "
+    ):
         first_space = cleaned.find(" ")
         if first_space > 0:
-            cleaned = cleaned[first_space + 1:]
+            cleaned = cleaned[first_space + 1 :]
             cleaned_lower = cleaned.lower()
     # For "Avoid" and bare "Prefer", convert leading verbs to gerund form
@@ -518,7 +595,7 @@ def _to_imperative(rule: str, confidence: ConfidenceLevel) -> str:
         first_word = cleaned_lower.split()[0] if cleaned_lower else ""
         if first_word in _VERB_TO_GERUND:
             gerund = _VERB_TO_GERUND[first_word]
-            cleaned = gerund + cleaned[len(first_word):]
+            cleaned = gerund + cleaned[len(first_word) :]
             cleaned_lower = cleaned.lower()
     # Lowercase first char if we're adding a prefix (but not for gerunds which are already lower)
@@ -538,8 +615,10 @@ def _format_rules(skill_set: SkillSet) -> str:
     lines.append("# Project Rules")
     lines.append("")
-    lines.append(f"*Auto-generated from {skill_set.source_entries} buildlog entries. "
-                 f"{skill_set.total_skills} rules extracted.*")
+    lines.append(
+        f"*Auto-generated from {skill_set.source_entries} buildlog entries. "
+        f"{skill_set.total_skills} rules extracted.*"
+    )
     lines.append("")
     # Collect all skills, sort by confidence then frequency
@@ -625,6 +704,8 @@ def format_skills(skill_set: SkillSet, fmt: OutputFormat = "yaml") -> str:
     formatter = formatters.get(fmt)
     if formatter is None:
-        raise ValueError(f"Unknown format: {fmt}. Must be one of: {list(formatters.keys())}")
+        raise ValueError(
+            f"Unknown format: {fmt}. Must be one of: {list(formatters.keys())}"
+        )
     return formatter(skill_set)

buildlog/stats.py CHANGED Viewed

@@ -12,7 +12,7 @@ __all__ = [
 import json
 import logging
 from dataclasses import dataclass, field
-from datetime import UTC, date, datetime, timedelta
+from datetime import date, datetime, timedelta, timezone
 from itertools import takewhile
 from pathlib import Path
 from typing import Final, NamedTuple, TypedDict
@@ -315,7 +315,9 @@ def calculate_stats(
     # Parse all entries using functional map/filter pattern
     parsed_or_none = [
         _parse_entry(entry_path, date_str)
-        for entry_path, date_str in iter_buildlog_entries(buildlog_dir, since=since_date)
+        for entry_path, date_str in iter_buildlog_entries(
+            buildlog_dir, since=since_date
+        )
     ]
     entries = [e for e in parsed_or_none if e is not None]
@@ -326,8 +328,8 @@ def calculate_stats(
     entry_dates = [e.entry_date for e in entries if e.entry_date]
-    this_week = sum(1 for d in entry_dates if d and d >= week_ago)
-    this_month = sum(1 for d in entry_dates if d and d >= month_start)
+    this_week = sum(1 for d in entry_dates if d and d >= week_ago)  # type: ignore[misc]
+    this_month = sum(1 for d in entry_dates if d and d >= month_start)  # type: ignore[misc]
     with_improvements = sum(1 for e in entries if e.has_improvements)
     coverage_percent = int((with_improvements / len(entries) * 100) if entries else 0)
@@ -351,7 +353,7 @@ def calculate_stats(
             warnings.insert(0, "No buildlog entries found")
     return BuildlogStats(
-        generated_at=datetime.now(UTC).isoformat().replace("+00:00", "Z"),
+        generated_at=datetime.now(timezone.utc).isoformat().replace("+00:00", "Z"),
         entries=EntryStats(
             total=len(entries),
             this_week=this_week,

buildlog 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl

buildlog 0.1.0py3-none-any.whl → 0.2.0py3-none-any.whl