buildlog 0.4.0__py3-none-any.whl → 0.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. buildlog/cli.py +799 -3
  2. buildlog/core/__init__.py +34 -0
  3. buildlog/core/operations.py +925 -0
  4. buildlog/mcp/server.py +16 -0
  5. buildlog/mcp/tools.py +266 -1
  6. buildlog/seed_engine/__init__.py +74 -0
  7. buildlog/seed_engine/categorizers.py +145 -0
  8. buildlog/seed_engine/extractors.py +148 -0
  9. buildlog/seed_engine/generators.py +144 -0
  10. buildlog/seed_engine/models.py +113 -0
  11. buildlog/seed_engine/pipeline.py +202 -0
  12. buildlog/seed_engine/sources.py +362 -0
  13. buildlog/seeds.py +211 -0
  14. buildlog/skills.py +26 -3
  15. buildlog-0.6.0.dist-info/METADATA +490 -0
  16. buildlog-0.6.0.dist-info/RECORD +38 -0
  17. buildlog-0.4.0.dist-info/METADATA +0 -894
  18. buildlog-0.4.0.dist-info/RECORD +0 -30
  19. {buildlog-0.4.0.data → buildlog-0.6.0.data}/data/share/buildlog/copier.yml +0 -0
  20. {buildlog-0.4.0.data → buildlog-0.6.0.data}/data/share/buildlog/post_gen.py +0 -0
  21. {buildlog-0.4.0.data → buildlog-0.6.0.data}/data/share/buildlog/template/buildlog/.gitkeep +0 -0
  22. {buildlog-0.4.0.data → buildlog-0.6.0.data}/data/share/buildlog/template/buildlog/2026-01-01-example.md +0 -0
  23. {buildlog-0.4.0.data → buildlog-0.6.0.data}/data/share/buildlog/template/buildlog/BUILDLOG_SYSTEM.md +0 -0
  24. {buildlog-0.4.0.data → buildlog-0.6.0.data}/data/share/buildlog/template/buildlog/_TEMPLATE.md +0 -0
  25. {buildlog-0.4.0.data → buildlog-0.6.0.data}/data/share/buildlog/template/buildlog/assets/.gitkeep +0 -0
  26. {buildlog-0.4.0.dist-info → buildlog-0.6.0.dist-info}/WHEEL +0 -0
  27. {buildlog-0.4.0.dist-info → buildlog-0.6.0.dist-info}/entry_points.txt +0 -0
  28. {buildlog-0.4.0.dist-info → buildlog-0.6.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,144 @@
1
+ """Seed file generators for Step 4 of the seed engine pipeline.
2
+
3
+ Generators take categorized rules and produce the final seed file.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ from dataclasses import dataclass
9
+ from pathlib import Path
10
+ from typing import Any
11
+
12
+ import yaml
13
+
14
+ from buildlog.seed_engine.models import CategorizedRule
15
+
16
+
17
+ @dataclass
18
+ class SeedGenerator:
19
+ """Generate YAML seed files from categorized rules.
20
+
21
+ Usage:
22
+ generator = SeedGenerator(
23
+ persona="test_terrorist",
24
+ version=1,
25
+ output_dir=Path(".buildlog/seeds"),
26
+ )
27
+
28
+ seed_file = generator.generate(categorized_rules)
29
+ generator.write(seed_file)
30
+ """
31
+
32
+ persona: str
33
+ version: int = 1
34
+ output_dir: Path | None = None
35
+ header_comment: str | None = None
36
+
37
+ def generate(self, rules: list[CategorizedRule]) -> dict[str, Any]:
38
+ """Generate seed file dictionary from categorized rules.
39
+
40
+ Args:
41
+ rules: The categorized rules to include.
42
+
43
+ Returns:
44
+ Seed file as dictionary (ready for YAML serialization).
45
+ """
46
+ # Validate all rules are complete
47
+ incomplete = [r for r in rules if not self._is_complete(r)]
48
+ if incomplete:
49
+ raise ValueError(
50
+ f"{len(incomplete)} rules are incomplete. "
51
+ f"First: '{incomplete[0].rule[:50]}...'"
52
+ )
53
+
54
+ return {
55
+ "persona": self.persona,
56
+ "version": self.version,
57
+ "rules": [r.to_seed_dict() for r in rules],
58
+ }
59
+
60
+ def write(
61
+ self,
62
+ seed_data: dict[str, Any],
63
+ path: Path | None = None,
64
+ ) -> Path:
65
+ """Write seed file to disk.
66
+
67
+ Args:
68
+ seed_data: The seed file dictionary.
69
+ path: Output path. If None, uses output_dir/persona.yaml.
70
+
71
+ Returns:
72
+ Path to written file.
73
+ """
74
+ if path is None:
75
+ if self.output_dir is None:
76
+ raise ValueError("No output path or output_dir specified")
77
+ self.output_dir.mkdir(parents=True, exist_ok=True)
78
+ path = self.output_dir / f"{self.persona}.yaml"
79
+
80
+ # Build YAML content with optional header
81
+ yaml_content = yaml.dump(
82
+ seed_data,
83
+ default_flow_style=False,
84
+ allow_unicode=True,
85
+ sort_keys=False,
86
+ width=100,
87
+ )
88
+
89
+ # Add header comment if provided
90
+ if self.header_comment:
91
+ lines = [f"# {line}" for line in self.header_comment.split("\n")]
92
+ header = "\n".join(lines) + "\n\n"
93
+ yaml_content = header + yaml_content
94
+
95
+ path.write_text(yaml_content)
96
+ return path
97
+
98
+ def _is_complete(self, rule: CategorizedRule) -> bool:
99
+ """Check if a rule has all required fields."""
100
+ return bool(
101
+ rule.rule.strip()
102
+ and rule.context.strip()
103
+ and rule.antipattern.strip()
104
+ and rule.rationale.strip()
105
+ and rule.category.strip()
106
+ )
107
+
108
+ def validate(self, seed_data: dict[str, Any]) -> list[str]:
109
+ """Validate seed file structure.
110
+
111
+ Args:
112
+ seed_data: The seed file dictionary.
113
+
114
+ Returns:
115
+ List of validation issues (empty if valid).
116
+ """
117
+ issues = []
118
+
119
+ if "persona" not in seed_data:
120
+ issues.append("Missing 'persona' field")
121
+ if "version" not in seed_data:
122
+ issues.append("Missing 'version' field")
123
+ if "rules" not in seed_data:
124
+ issues.append("Missing 'rules' field")
125
+ return issues
126
+
127
+ for i, rule in enumerate(seed_data.get("rules", [])):
128
+ prefix = f"Rule {i + 1}"
129
+ if not rule.get("rule"):
130
+ issues.append(f"{prefix}: Missing 'rule' text")
131
+ if not rule.get("context"):
132
+ issues.append(
133
+ f"{prefix}: Missing 'context' (required for defensibility)"
134
+ )
135
+ if not rule.get("antipattern"):
136
+ issues.append(
137
+ f"{prefix}: Missing 'antipattern' (required for defensibility)"
138
+ )
139
+ if not rule.get("rationale"):
140
+ issues.append(
141
+ f"{prefix}: Missing 'rationale' (required for defensibility)"
142
+ )
143
+
144
+ return issues
@@ -0,0 +1,113 @@
1
+ """Data models for the seed engine pipeline."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass, field
6
+ from enum import Enum
7
+ from typing import Any
8
+
9
+
10
+ class SourceType(Enum):
11
+ """Type of authoritative source."""
12
+
13
+ REFERENCE_DOC = "reference_doc" # OWASP, RFC, official docs
14
+ BLOG_POST = "blog_post" # Google Testing Blog, Fowler
15
+ BOOK = "book" # Clean Code, xUnit Patterns
16
+ STANDARD = "standard" # ISO, IEEE standards
17
+ CHEATSHEET = "cheatsheet" # OWASP cheatsheets
18
+
19
+
20
+ @dataclass
21
+ class Source:
22
+ """An authoritative source for rule extraction.
23
+
24
+ Step 1 output: Sources define where domain knowledge comes from.
25
+ Each source should be citable and defensible.
26
+ """
27
+
28
+ name: str
29
+ url: str
30
+ source_type: SourceType
31
+ domain: str # e.g., "security", "testing", "code-quality"
32
+ description: str = ""
33
+ sections: list[str] = field(default_factory=list) # Specific sections to extract
34
+
35
+ def to_reference(self) -> dict[str, str]:
36
+ """Convert to seed file reference format."""
37
+ return {"url": self.url, "title": self.name}
38
+
39
+
40
+ @dataclass
41
+ class CandidateRule:
42
+ """A rule extracted from a source, before categorization.
43
+
44
+ Step 2 output: Raw rule with all defensibility fields.
45
+ May not yet be categorized or tagged.
46
+ """
47
+
48
+ rule: str # The prescription
49
+ context: str # When it applies
50
+ antipattern: str # What violation looks like
51
+ rationale: str # Why it matters
52
+ source: Source # Where it came from
53
+ raw_tags: list[str] = field(default_factory=list) # Tags from extraction
54
+ confidence: float = 1.0 # Extraction confidence (1.0 for manual)
55
+ metadata: dict[str, Any] = field(default_factory=dict)
56
+
57
+ def is_complete(self) -> bool:
58
+ """Check if all defensibility fields are populated."""
59
+ return bool(
60
+ self.rule.strip()
61
+ and self.context.strip()
62
+ and self.antipattern.strip()
63
+ and self.rationale.strip()
64
+ )
65
+
66
+
67
+ @dataclass
68
+ class CategorizedRule:
69
+ """A rule after categorization and tagging.
70
+
71
+ Step 3 output: Ready for seed file generation.
72
+ Has final category and tags assigned.
73
+ """
74
+
75
+ rule: str
76
+ category: str # Final category (e.g., "testing", "security")
77
+ context: str
78
+ antipattern: str
79
+ rationale: str
80
+ tags: list[str] # Final tags
81
+ references: list[dict[str, str]] # [{"url": ..., "title": ...}]
82
+ confidence: float = 1.0
83
+
84
+ @classmethod
85
+ def from_candidate(
86
+ cls,
87
+ candidate: CandidateRule,
88
+ category: str,
89
+ tags: list[str],
90
+ ) -> CategorizedRule:
91
+ """Create from a candidate rule with assigned category/tags."""
92
+ return cls(
93
+ rule=candidate.rule,
94
+ category=category,
95
+ context=candidate.context,
96
+ antipattern=candidate.antipattern,
97
+ rationale=candidate.rationale,
98
+ tags=tags,
99
+ references=[candidate.source.to_reference()],
100
+ confidence=candidate.confidence,
101
+ )
102
+
103
+ def to_seed_dict(self) -> dict[str, Any]:
104
+ """Convert to seed file rule format."""
105
+ return {
106
+ "rule": self.rule,
107
+ "category": self.category,
108
+ "context": self.context,
109
+ "antipattern": self.antipattern,
110
+ "rationale": self.rationale,
111
+ "tags": self.tags,
112
+ "references": self.references,
113
+ }
@@ -0,0 +1,202 @@
1
+ """Pipeline orchestration for the seed engine.
2
+
3
+ The Pipeline ties together all 4 steps:
4
+ 1. Source identification (input)
5
+ 2. Rule extraction
6
+ 3. Categorization
7
+ 4. Seed generation
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import logging
13
+ from dataclasses import dataclass
14
+ from pathlib import Path
15
+ from typing import Any
16
+
17
+ from buildlog.seed_engine.categorizers import Categorizer, TagBasedCategorizer
18
+ from buildlog.seed_engine.extractors import ManualExtractor, RuleExtractor
19
+ from buildlog.seed_engine.generators import SeedGenerator
20
+ from buildlog.seed_engine.models import CandidateRule, CategorizedRule, Source
21
+
22
+ logger = logging.getLogger(__name__)
23
+
24
+
25
+ @dataclass
26
+ class PipelineResult:
27
+ """Result of running the seed engine pipeline."""
28
+
29
+ persona: str
30
+ sources: list[Source]
31
+ candidates: list[CandidateRule]
32
+ categorized: list[CategorizedRule]
33
+ seed_data: dict[str, Any]
34
+ output_path: Path | None = None
35
+
36
+ @property
37
+ def rule_count(self) -> int:
38
+ return len(self.categorized)
39
+
40
+ @property
41
+ def source_count(self) -> int:
42
+ return len(self.sources)
43
+
44
+ def summary(self) -> str:
45
+ """Human-readable summary of the pipeline run."""
46
+ lines = [
47
+ f"Seed Engine Pipeline Result: {self.persona}",
48
+ f" Sources: {self.source_count}",
49
+ f" Candidates extracted: {len(self.candidates)}",
50
+ f" Rules categorized: {self.rule_count}",
51
+ ]
52
+ if self.output_path:
53
+ lines.append(f" Output: {self.output_path}")
54
+ return "\n".join(lines)
55
+
56
+
57
+ @dataclass
58
+ class Pipeline:
59
+ """The seed engine pipeline.
60
+
61
+ Orchestrates the 4-step process for creating reviewer personas:
62
+
63
+ 1. SOURCES → Define authoritative domain sources
64
+ 2. EXTRACT → Pull rules with defensibility fields
65
+ 3. CATEGORIZE → Assign categories and tags
66
+ 4. GENERATE → Output validated YAML seed file
67
+
68
+ Usage:
69
+ # Create pipeline with default components
70
+ pipeline = Pipeline(
71
+ persona="test_terrorist",
72
+ default_category="testing",
73
+ )
74
+
75
+ # Or customize each step
76
+ pipeline = Pipeline(
77
+ persona="test_terrorist",
78
+ extractor=MyCustomExtractor(),
79
+ categorizer=MyCustomCategorizer(),
80
+ generator=SeedGenerator(persona="test_terrorist", version=1),
81
+ )
82
+
83
+ # Run the pipeline
84
+ result = pipeline.run(sources, output_dir=Path(".buildlog/seeds"))
85
+ """
86
+
87
+ persona: str
88
+ default_category: str = "general"
89
+ version: int = 1
90
+ extractor: RuleExtractor | None = None
91
+ categorizer: Categorizer | None = None
92
+ generator: SeedGenerator | None = None
93
+
94
+ def __post_init__(self) -> None:
95
+ # Set defaults if not provided
96
+ if self.extractor is None:
97
+ self.extractor = ManualExtractor()
98
+ if self.categorizer is None:
99
+ self.categorizer = TagBasedCategorizer(
100
+ default_category=self.default_category
101
+ )
102
+ if self.generator is None:
103
+ self.generator = SeedGenerator(
104
+ persona=self.persona,
105
+ version=self.version,
106
+ )
107
+
108
+ def run(
109
+ self,
110
+ sources: list[Source],
111
+ output_dir: Path | None = None,
112
+ write: bool = True,
113
+ ) -> PipelineResult:
114
+ """Run the full pipeline.
115
+
116
+ Args:
117
+ sources: Step 1 - The authoritative sources to extract from.
118
+ output_dir: Where to write the seed file.
119
+ write: Whether to write the seed file to disk.
120
+
121
+ Returns:
122
+ PipelineResult with all intermediate and final outputs.
123
+ """
124
+ logger.info(f"Starting seed engine pipeline for '{self.persona}'")
125
+ logger.info(f"Processing {len(sources)} sources")
126
+
127
+ # These are guaranteed set by __post_init__
128
+ assert self.extractor is not None
129
+ assert self.categorizer is not None
130
+ assert self.generator is not None
131
+
132
+ # Step 2: Extract rules from each source
133
+ candidates: list[CandidateRule] = []
134
+ for source in sources:
135
+ extracted = self.extractor.extract(source)
136
+ logger.info(f" Extracted {len(extracted)} rules from {source.name}")
137
+ candidates.extend(extracted)
138
+
139
+ logger.info(f"Total candidates: {len(candidates)}")
140
+
141
+ # Step 3: Categorize each rule
142
+ categorized: list[CategorizedRule] = []
143
+ for candidate in candidates:
144
+ cat_rule = self.categorizer.categorize(candidate)
145
+ categorized.append(cat_rule)
146
+
147
+ logger.info(f"Categorized {len(categorized)} rules")
148
+
149
+ # Step 4: Generate seed file
150
+ self.generator.output_dir = output_dir
151
+ seed_data = self.generator.generate(categorized)
152
+
153
+ # Optionally write to disk
154
+ output_path = None
155
+ if write and output_dir:
156
+ output_path = self.generator.write(seed_data)
157
+ logger.info(f"Wrote seed file to {output_path}")
158
+
159
+ return PipelineResult(
160
+ persona=self.persona,
161
+ sources=sources,
162
+ candidates=candidates,
163
+ categorized=categorized,
164
+ seed_data=seed_data,
165
+ output_path=output_path,
166
+ )
167
+
168
+ def validate_sources(self, sources: list[Source]) -> list[str]:
169
+ """Validate that sources are properly defined.
170
+
171
+ Args:
172
+ sources: The sources to validate.
173
+
174
+ Returns:
175
+ List of validation issues (empty if valid).
176
+ """
177
+ issues = []
178
+ for i, source in enumerate(sources):
179
+ prefix = f"Source {i + 1} ({source.name})"
180
+ if not source.name.strip():
181
+ issues.append(f"{prefix}: Missing name")
182
+ if not source.url.strip():
183
+ issues.append(f"{prefix}: Missing URL")
184
+ if not source.domain.strip():
185
+ issues.append(f"{prefix}: Missing domain")
186
+ return issues
187
+
188
+ def dry_run(self, sources: list[Source]) -> dict[str, Any]:
189
+ """Run pipeline without writing, returning preview.
190
+
191
+ Useful for validation before committing to disk.
192
+ """
193
+ result = self.run(sources, write=False)
194
+ return {
195
+ "persona": result.persona,
196
+ "rule_count": result.rule_count,
197
+ "source_count": result.source_count,
198
+ "categories": list(set(r.category for r in result.categorized)),
199
+ "sample_rules": [
200
+ {"rule": r.rule, "category": r.category} for r in result.categorized[:3]
201
+ ],
202
+ }