buildlog 0.5.0__py3-none-any.whl → 0.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
buildlog/cli.py CHANGED
@@ -172,8 +172,8 @@ def new(slug: str, entry_date: str | None):
172
172
  click.echo(f"\nOpen it: $EDITOR {entry_path}")
173
173
 
174
174
 
175
- @main.command()
176
- def list():
175
+ @main.command("list")
176
+ def list_entries():
177
177
  """List all buildlog entries."""
178
178
  buildlog_dir = Path("buildlog")
179
179
 
@@ -182,7 +182,8 @@ def list():
182
182
  raise SystemExit(1)
183
183
 
184
184
  entries = sorted(
185
- buildlog_dir.glob("20??-??-??-*.md"), reverse=True # Most recent first
185
+ buildlog_dir.glob("20??-??-??-*.md"),
186
+ reverse=True, # Most recent first
186
187
  )
187
188
 
188
189
  if not entries:
@@ -876,5 +877,380 @@ def experiment_report(output_json: bool):
876
877
  )
877
878
 
878
879
 
880
+ # -----------------------------------------------------------------------------
881
+ # Gauntlet Commands (Review Personas)
882
+ # -----------------------------------------------------------------------------
883
+
884
+ PERSONAS = {
885
+ "security_karen": "OWASP Top 10 security review",
886
+ "test_terrorist": "Comprehensive testing coverage audit",
887
+ "ruthless_reviewer": "Code quality and functional principles",
888
+ }
889
+
890
+
891
+ @main.group()
892
+ def gauntlet():
893
+ """Run the review gauntlet with curated personas.
894
+
895
+ The gauntlet runs your code through multiple ruthless reviewers,
896
+ each with domain-specific rules loaded from seed files.
897
+
898
+ Personas:
899
+ - security_karen: OWASP security review (12 rules)
900
+ - test_terrorist: Testing coverage audit (21 rules)
901
+ - ruthless_reviewer: Code quality review (coming soon)
902
+
903
+ Example workflow:
904
+
905
+ buildlog gauntlet list # See available personas
906
+ buildlog gauntlet rules --persona all # Show all rules
907
+ buildlog gauntlet prompt src/ # Generate review prompt
908
+ """
909
+ pass
910
+
911
+
912
+ @gauntlet.command("list")
913
+ @click.option("--json", "output_json", is_flag=True, help="Output as JSON")
914
+ def gauntlet_list(output_json: bool):
915
+ """List available reviewer personas and their rule counts.
916
+
917
+ Examples:
918
+
919
+ buildlog gauntlet list
920
+ buildlog gauntlet list --json
921
+ """
922
+ import json as json_module
923
+
924
+ from buildlog.seeds import load_all_seeds
925
+
926
+ # Find seeds directory
927
+ buildlog_dir = Path("buildlog")
928
+ seeds_dir = buildlog_dir / ".buildlog" / "seeds"
929
+
930
+ # Also check .buildlog at repo root (common for installed templates)
931
+ if not seeds_dir.exists():
932
+ seeds_dir = Path(".buildlog") / "seeds"
933
+
934
+ seeds = load_all_seeds(seeds_dir)
935
+
936
+ if output_json:
937
+ data = {
938
+ "personas": {
939
+ name: {
940
+ "description": PERSONAS.get(name, "Custom persona"),
941
+ "rules_count": len(sf.rules),
942
+ "version": sf.version,
943
+ }
944
+ for name, sf in seeds.items()
945
+ },
946
+ "total_rules": sum(len(sf.rules) for sf in seeds.values()),
947
+ }
948
+ click.echo(json_module.dumps(data, indent=2))
949
+ else:
950
+ click.echo("Review Gauntlet Personas")
951
+ click.echo("=" * 50)
952
+
953
+ if not seeds:
954
+ click.echo("\nNo seed files found.")
955
+ click.echo("Initialize with: buildlog init")
956
+ click.echo("Or create seeds in: .buildlog/seeds/")
957
+ return
958
+
959
+ total = 0
960
+ for name, sf in sorted(seeds.items()):
961
+ desc = PERSONAS.get(name, "Custom persona")
962
+ click.echo(f"\n {name}")
963
+ click.echo(f" {desc}")
964
+ click.echo(f" Rules: {len(sf.rules)} (v{sf.version})")
965
+ total += len(sf.rules)
966
+
967
+ click.echo(f"\nTotal: {len(seeds)} personas, {total} rules")
968
+
969
+
970
+ @gauntlet.command("rules")
971
+ @click.option(
972
+ "--persona",
973
+ "-p",
974
+ default="all",
975
+ help="Persona to show rules for (or 'all')",
976
+ )
977
+ @click.option(
978
+ "--format",
979
+ "fmt",
980
+ type=click.Choice(["yaml", "json", "markdown"]),
981
+ default="yaml",
982
+ help="Output format",
983
+ )
984
+ @click.option("--output", "-o", type=click.Path(), help="Output file")
985
+ def gauntlet_rules(persona: str, fmt: str, output: str | None):
986
+ """Show rules for reviewer personas.
987
+
988
+ Use this to see what rules are loaded for each persona,
989
+ or export them for use in prompts.
990
+
991
+ Examples:
992
+
993
+ buildlog gauntlet rules # All rules (YAML)
994
+ buildlog gauntlet rules -p security_karen # Single persona
995
+ buildlog gauntlet rules --format json -o rules.json
996
+ buildlog gauntlet rules --format markdown # For docs
997
+ """
998
+ import json as json_module
999
+
1000
+ from buildlog.seeds import load_all_seeds
1001
+
1002
+ # Find seeds directory
1003
+ seeds_dir = Path(".buildlog") / "seeds"
1004
+ if not seeds_dir.exists():
1005
+ seeds_dir = Path("buildlog") / ".buildlog" / "seeds"
1006
+
1007
+ seeds = load_all_seeds(seeds_dir)
1008
+
1009
+ if not seeds:
1010
+ click.echo("No seed files found.", err=True)
1011
+ click.echo("Initialize with: buildlog init", err=True)
1012
+ raise SystemExit(1)
1013
+
1014
+ # Filter personas
1015
+ if persona != "all":
1016
+ if persona not in seeds:
1017
+ available = ", ".join(seeds.keys())
1018
+ click.echo(f"Unknown persona: {persona}", err=True)
1019
+ click.echo(f"Available: {available}", err=True)
1020
+ raise SystemExit(1)
1021
+ seeds = {persona: seeds[persona]}
1022
+
1023
+ # Build output data
1024
+ if fmt == "json":
1025
+ data = {}
1026
+ for name, sf in seeds.items():
1027
+ data[name] = {
1028
+ "version": sf.version,
1029
+ "rules": [
1030
+ {
1031
+ "rule": r.rule,
1032
+ "category": r.category,
1033
+ "context": r.context,
1034
+ "antipattern": r.antipattern,
1035
+ "rationale": r.rationale,
1036
+ "tags": r.tags,
1037
+ "references": [
1038
+ {"url": ref.url, "title": ref.title} for ref in r.references
1039
+ ],
1040
+ }
1041
+ for r in sf.rules
1042
+ ],
1043
+ }
1044
+ formatted = json_module.dumps(data, indent=2)
1045
+
1046
+ elif fmt == "markdown":
1047
+ lines = ["# Review Gauntlet Rules\n"]
1048
+ for name, sf in seeds.items():
1049
+ lines.append(f"## {name.replace('_', ' ').title()}\n")
1050
+ lines.append(f"*{len(sf.rules)} rules, v{sf.version}*\n")
1051
+ for i, r in enumerate(sf.rules, 1):
1052
+ lines.append(f"### {i}. {r.rule}\n")
1053
+ lines.append(f"**Category**: {r.category} ")
1054
+ lines.append(f"**Tags**: {', '.join(r.tags)}\n")
1055
+ if r.context:
1056
+ lines.append(f"**When**: {r.context}\n")
1057
+ if r.antipattern:
1058
+ lines.append(f"**Antipattern**: {r.antipattern}\n")
1059
+ if r.rationale:
1060
+ lines.append(f"**Why**: {r.rationale}\n")
1061
+ if r.references:
1062
+ lines.append("**References**:")
1063
+ for ref in r.references:
1064
+ lines.append(f"- [{ref.title}]({ref.url})")
1065
+ lines.append("")
1066
+ formatted = "\n".join(lines)
1067
+
1068
+ else: # yaml
1069
+ import yaml as yaml_module
1070
+
1071
+ data = {}
1072
+ for name, sf in seeds.items():
1073
+ data[name] = {
1074
+ "version": sf.version,
1075
+ "rules": [
1076
+ {
1077
+ "rule": r.rule,
1078
+ "category": r.category,
1079
+ "context": r.context,
1080
+ "antipattern": r.antipattern,
1081
+ "rationale": r.rationale,
1082
+ "tags": r.tags,
1083
+ }
1084
+ for r in sf.rules
1085
+ ],
1086
+ }
1087
+ formatted = yaml_module.dump(data, default_flow_style=False, sort_keys=False)
1088
+
1089
+ # Output
1090
+ if output:
1091
+ output_path = Path(output)
1092
+ output_path.write_text(formatted, encoding="utf-8")
1093
+ total = sum(len(sf.rules) for sf in seeds.values())
1094
+ click.echo(f"Wrote {total} rules to {output_path}")
1095
+ else:
1096
+ click.echo(formatted)
1097
+
1098
+
1099
+ @gauntlet.command("prompt")
1100
+ @click.argument("target", type=click.Path(exists=True))
1101
+ @click.option(
1102
+ "--persona",
1103
+ "-p",
1104
+ multiple=True,
1105
+ help="Personas to include (default: all)",
1106
+ )
1107
+ @click.option("--output", "-o", type=click.Path(), help="Output file")
1108
+ def gauntlet_prompt(target: str, persona: tuple[str, ...], output: str | None):
1109
+ """Generate a review prompt for the gauntlet.
1110
+
1111
+ Creates a prompt with rules and target code that can be
1112
+ used with Claude or another LLM to run a review.
1113
+
1114
+ Examples:
1115
+
1116
+ buildlog gauntlet prompt src/
1117
+ buildlog gauntlet prompt src/api.py -p security_karen
1118
+ buildlog gauntlet prompt . -o review_prompt.md
1119
+ """
1120
+ from buildlog.seeds import load_all_seeds
1121
+
1122
+ # Find seeds directory
1123
+ seeds_dir = Path(".buildlog") / "seeds"
1124
+ if not seeds_dir.exists():
1125
+ seeds_dir = Path("buildlog") / ".buildlog" / "seeds"
1126
+
1127
+ seeds = load_all_seeds(seeds_dir)
1128
+
1129
+ if not seeds:
1130
+ click.echo("No seed files found.", err=True)
1131
+ raise SystemExit(1)
1132
+
1133
+ # Filter personas
1134
+ if persona:
1135
+ seeds = {k: v for k, v in seeds.items() if k in persona}
1136
+ if not seeds:
1137
+ click.echo(f"No matching personas: {', '.join(persona)}", err=True)
1138
+ raise SystemExit(1)
1139
+
1140
+ # Build the prompt
1141
+ target_path = Path(target)
1142
+ lines = [
1143
+ "# Review Gauntlet Prompt\n",
1144
+ "You are running the Review Gauntlet. Apply these rules ruthlessly.\n",
1145
+ "## Target\n",
1146
+ f"Review: `{target_path}`\n",
1147
+ "## Reviewers and Rules\n",
1148
+ ]
1149
+
1150
+ for name, sf in seeds.items():
1151
+ persona_name = name.replace("_", " ").title()
1152
+ lines.append(f"### {persona_name}\n")
1153
+ for r in sf.rules:
1154
+ lines.append(f"- **{r.rule}**")
1155
+ if r.antipattern:
1156
+ lines.append(f" - Antipattern: {r.antipattern}")
1157
+ lines.append("")
1158
+
1159
+ lines.extend(
1160
+ [
1161
+ "## Output Format\n",
1162
+ "For each issue found, output:\n",
1163
+ "```json",
1164
+ "{",
1165
+ ' "reviewer": "<persona>",',
1166
+ ' "severity": "critical|major|minor|nitpick",',
1167
+ ' "category": "<category>",',
1168
+ ' "location": "<file:line>",',
1169
+ ' "description": "<what is wrong>",',
1170
+ ' "rule_learned": "<generalizable rule>"',
1171
+ "}",
1172
+ "```\n",
1173
+ "## Instructions\n",
1174
+ "1. Read the target code thoroughly",
1175
+ "2. Apply each rule from each reviewer",
1176
+ "3. Report ALL violations found",
1177
+ "4. Be ruthless - this is the gauntlet",
1178
+ "",
1179
+ ]
1180
+ )
1181
+
1182
+ formatted = "\n".join(lines)
1183
+
1184
+ if output:
1185
+ output_path = Path(output)
1186
+ output_path.write_text(formatted, encoding="utf-8")
1187
+ click.echo(f"Wrote prompt to {output_path}")
1188
+ else:
1189
+ click.echo(formatted)
1190
+
1191
+
1192
+ @gauntlet.command("learn")
1193
+ @click.argument("issues_file", type=click.Path(exists=True))
1194
+ @click.option("--source", "-s", help="Source identifier (e.g., 'gauntlet:PR#42')")
1195
+ @click.option("--json", "output_json", is_flag=True, help="Output as JSON")
1196
+ def gauntlet_learn(issues_file: str, source: str | None, output_json: bool):
1197
+ """Persist learnings from a gauntlet review.
1198
+
1199
+ Takes a JSON file of issues (in the gauntlet output format)
1200
+ and calls learn_from_review to persist them.
1201
+
1202
+ Examples:
1203
+
1204
+ buildlog gauntlet learn review_issues.json
1205
+ buildlog gauntlet learn issues.json --source "gauntlet:2026-01-22"
1206
+ """
1207
+ import json as json_module
1208
+ from dataclasses import asdict
1209
+
1210
+ from buildlog.core import learn_from_review
1211
+
1212
+ buildlog_dir = Path("buildlog")
1213
+
1214
+ if not buildlog_dir.exists():
1215
+ click.echo("No buildlog/ directory found. Run 'buildlog init' first.", err=True)
1216
+ raise SystemExit(1)
1217
+
1218
+ # Load issues
1219
+ try:
1220
+ with open(issues_file) as f:
1221
+ data = json_module.load(f)
1222
+ except json_module.JSONDecodeError as e:
1223
+ click.echo(f"Invalid JSON: {e}", err=True)
1224
+ raise SystemExit(1)
1225
+
1226
+ # Handle different formats
1227
+ if isinstance(data, list):
1228
+ issues = data
1229
+ elif isinstance(data, dict) and "all_issues" in data:
1230
+ issues = data["all_issues"]
1231
+ elif isinstance(data, dict) and "issues" in data:
1232
+ issues = data["issues"]
1233
+ else:
1234
+ click.echo(
1235
+ "Expected list of issues or dict with 'issues'/'all_issues'", err=True
1236
+ )
1237
+ raise SystemExit(1)
1238
+
1239
+ if not issues:
1240
+ click.echo("No issues found in file.", err=True)
1241
+ raise SystemExit(1)
1242
+
1243
+ # Learn from review
1244
+ result = learn_from_review(buildlog_dir, issues, source=source or "gauntlet")
1245
+
1246
+ if output_json:
1247
+ click.echo(json_module.dumps(asdict(result), indent=2))
1248
+ else:
1249
+ click.echo(f"✓ {result.message}")
1250
+ click.echo(f" New learnings: {result.new_learnings}")
1251
+ click.echo(f" Reinforced: {result.reinforced_learnings}")
1252
+ click.echo(f" Total processed: {result.total_issues_processed}")
1253
+
1254
+
879
1255
  if __name__ == "__main__":
880
1256
  main()
@@ -0,0 +1,74 @@
1
+ """Seed Engine - Formalized pipeline for creating reviewer personas.
2
+
3
+ The seed engine abstracts the 4-step process for bootstrapping
4
+ defensible reviewer personas from authoritative domain sources:
5
+
6
+ 1. SOURCE IDENTIFICATION - Define authoritative sources
7
+ 2. RULE EXTRACTION - Extract candidate rules with defensibility fields
8
+ 3. CATEGORIZATION - Map rules to persona concern categories
9
+ 4. SEED GENERATION - Output validated YAML seed file
10
+
11
+ Usage:
12
+ from buildlog.seed_engine import Pipeline, Source, SourceType
13
+
14
+ # Define sources
15
+ sources = [
16
+ Source(
17
+ name="OWASP Top 10",
18
+ url="https://owasp.org/Top10/",
19
+ source_type=SourceType.REFERENCE_DOC,
20
+ domain="security",
21
+ )
22
+ ]
23
+
24
+ # Run pipeline
25
+ pipeline = Pipeline(persona="security_karen")
26
+ seed_file = pipeline.run(sources)
27
+ """
28
+
29
+ from buildlog.seed_engine.categorizers import (
30
+ Categorizer,
31
+ CategoryMapping,
32
+ TagBasedCategorizer,
33
+ )
34
+ from buildlog.seed_engine.extractors import ManualExtractor, RuleExtractor
35
+ from buildlog.seed_engine.generators import SeedGenerator
36
+ from buildlog.seed_engine.models import (
37
+ CandidateRule,
38
+ CategorizedRule,
39
+ Source,
40
+ SourceType,
41
+ )
42
+ from buildlog.seed_engine.pipeline import Pipeline
43
+ from buildlog.seed_engine.sources import (
44
+ FetchStatus,
45
+ SourceEntry,
46
+ SourceFetcher,
47
+ SourceManifest,
48
+ url_to_cache_filename,
49
+ )
50
+
51
+ __all__ = [
52
+ # Models
53
+ "Source",
54
+ "SourceType",
55
+ "CandidateRule",
56
+ "CategorizedRule",
57
+ # Pipeline
58
+ "Pipeline",
59
+ # Extractors
60
+ "RuleExtractor",
61
+ "ManualExtractor",
62
+ # Categorizers
63
+ "Categorizer",
64
+ "TagBasedCategorizer",
65
+ "CategoryMapping",
66
+ # Generators
67
+ "SeedGenerator",
68
+ # Sources
69
+ "FetchStatus",
70
+ "SourceEntry",
71
+ "SourceManifest",
72
+ "SourceFetcher",
73
+ "url_to_cache_filename",
74
+ ]
@@ -0,0 +1,145 @@
1
+ """Rule categorizers for Step 3 of the seed engine pipeline.
2
+
3
+ Categorizers take candidate rules and assign final categories and tags.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ from abc import ABC, abstractmethod
9
+ from collections.abc import Callable
10
+ from dataclasses import dataclass
11
+
12
+ from buildlog.seed_engine.models import CandidateRule, CategorizedRule
13
+
14
+
15
+ class Categorizer(ABC):
16
+ """Protocol for categorizing rules.
17
+
18
+ Implementations:
19
+ - TagBasedCategorizer: Category from tags/keywords
20
+ - MappingCategorizer: Explicit source→category mapping
21
+ """
22
+
23
+ @abstractmethod
24
+ def categorize(self, rule: CandidateRule) -> CategorizedRule:
25
+ """Assign category and final tags to a rule.
26
+
27
+ Args:
28
+ rule: The candidate rule to categorize.
29
+
30
+ Returns:
31
+ Categorized rule ready for seed generation.
32
+ """
33
+ ...
34
+
35
+
36
+ @dataclass
37
+ class CategoryMapping:
38
+ """Mapping from keywords/tags to category."""
39
+
40
+ category: str
41
+ keywords: list[str] # If any of these appear in tags/rule, assign this category
42
+ priority: int = 0 # Higher priority wins on conflicts
43
+
44
+
45
+ class TagBasedCategorizer(Categorizer):
46
+ """Categorize rules based on their tags and keywords.
47
+
48
+ Usage:
49
+ categorizer = TagBasedCategorizer(
50
+ default_category="testing",
51
+ mappings=[
52
+ CategoryMapping("coverage", ["coverage", "untested"]),
53
+ CategoryMapping("isolation", ["flaky", "order", "hermetic"]),
54
+ CategoryMapping("assertions", ["assert", "expect", "verify"]),
55
+ ],
56
+ tag_normalizer=lambda t: t.lower().replace("-", "_"),
57
+ )
58
+
59
+ categorized = categorizer.categorize(candidate_rule)
60
+ """
61
+
62
+ def __init__(
63
+ self,
64
+ default_category: str,
65
+ mappings: list[CategoryMapping] | None = None,
66
+ tag_normalizer: Callable[[str], str] | None = None,
67
+ additional_tags: list[str] | None = None,
68
+ ) -> None:
69
+ self.default_category = default_category
70
+ self.mappings = sorted(mappings or [], key=lambda m: m.priority, reverse=True)
71
+ self.tag_normalizer = tag_normalizer or (lambda t: t.lower())
72
+ self.additional_tags = additional_tags or []
73
+
74
+ def categorize(self, rule: CandidateRule) -> CategorizedRule:
75
+ """Assign category based on tag matching."""
76
+ # Normalize tags
77
+ normalized_tags = [self.tag_normalizer(t) for t in rule.raw_tags]
78
+
79
+ # Also check rule text for keywords
80
+ rule_text_lower = rule.rule.lower()
81
+
82
+ # Find matching category
83
+ category = self.default_category
84
+ for mapping in self.mappings:
85
+ for keyword in mapping.keywords:
86
+ keyword_lower = keyword.lower()
87
+ if keyword_lower in normalized_tags or keyword_lower in rule_text_lower:
88
+ category = mapping.category
89
+ break
90
+ else:
91
+ continue
92
+ break
93
+
94
+ # Build final tags
95
+ final_tags = list(set(normalized_tags + self.additional_tags))
96
+
97
+ return CategorizedRule.from_candidate(
98
+ candidate=rule,
99
+ category=category,
100
+ tags=final_tags,
101
+ )
102
+
103
+
104
+ class MappingCategorizer(Categorizer):
105
+ """Categorize rules via explicit source→category mapping.
106
+
107
+ Useful when sources map directly to categories
108
+ (e.g., OWASP A03 → "injection").
109
+
110
+ Usage:
111
+ categorizer = MappingCategorizer(
112
+ source_category_map={
113
+ "https://owasp.org/Top10/A03": "injection",
114
+ "https://owasp.org/Top10/A01": "access-control",
115
+ },
116
+ default_category="security",
117
+ )
118
+ """
119
+
120
+ def __init__(
121
+ self,
122
+ source_category_map: dict[str, str],
123
+ default_category: str,
124
+ tag_transform: Callable[[list[str]], list[str]] | None = None,
125
+ ) -> None:
126
+ self.source_category_map = source_category_map
127
+ self.default_category = default_category
128
+ self.tag_transform = tag_transform or (lambda tags: tags)
129
+
130
+ def categorize(self, rule: CandidateRule) -> CategorizedRule:
131
+ """Assign category based on source URL."""
132
+ # Find category by matching source URL prefix
133
+ category = self.default_category
134
+ for url_prefix, cat in self.source_category_map.items():
135
+ if rule.source.url.startswith(url_prefix):
136
+ category = cat
137
+ break
138
+
139
+ final_tags = self.tag_transform(rule.raw_tags)
140
+
141
+ return CategorizedRule.from_candidate(
142
+ candidate=rule,
143
+ category=category,
144
+ tags=final_tags,
145
+ )