buildlog 0.4.0__py3-none-any.whl → 0.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. buildlog/cli.py +799 -3
  2. buildlog/core/__init__.py +34 -0
  3. buildlog/core/operations.py +925 -0
  4. buildlog/mcp/server.py +16 -0
  5. buildlog/mcp/tools.py +266 -1
  6. buildlog/seed_engine/__init__.py +74 -0
  7. buildlog/seed_engine/categorizers.py +145 -0
  8. buildlog/seed_engine/extractors.py +148 -0
  9. buildlog/seed_engine/generators.py +144 -0
  10. buildlog/seed_engine/models.py +113 -0
  11. buildlog/seed_engine/pipeline.py +202 -0
  12. buildlog/seed_engine/sources.py +362 -0
  13. buildlog/seeds.py +211 -0
  14. buildlog/skills.py +26 -3
  15. buildlog-0.6.0.dist-info/METADATA +490 -0
  16. buildlog-0.6.0.dist-info/RECORD +38 -0
  17. buildlog-0.4.0.dist-info/METADATA +0 -894
  18. buildlog-0.4.0.dist-info/RECORD +0 -30
  19. {buildlog-0.4.0.data → buildlog-0.6.0.data}/data/share/buildlog/copier.yml +0 -0
  20. {buildlog-0.4.0.data → buildlog-0.6.0.data}/data/share/buildlog/post_gen.py +0 -0
  21. {buildlog-0.4.0.data → buildlog-0.6.0.data}/data/share/buildlog/template/buildlog/.gitkeep +0 -0
  22. {buildlog-0.4.0.data → buildlog-0.6.0.data}/data/share/buildlog/template/buildlog/2026-01-01-example.md +0 -0
  23. {buildlog-0.4.0.data → buildlog-0.6.0.data}/data/share/buildlog/template/buildlog/BUILDLOG_SYSTEM.md +0 -0
  24. {buildlog-0.4.0.data → buildlog-0.6.0.data}/data/share/buildlog/template/buildlog/_TEMPLATE.md +0 -0
  25. {buildlog-0.4.0.data → buildlog-0.6.0.data}/data/share/buildlog/template/buildlog/assets/.gitkeep +0 -0
  26. {buildlog-0.4.0.dist-info → buildlog-0.6.0.dist-info}/WHEEL +0 -0
  27. {buildlog-0.4.0.dist-info → buildlog-0.6.0.dist-info}/entry_points.txt +0 -0
  28. {buildlog-0.4.0.dist-info → buildlog-0.6.0.dist-info}/licenses/LICENSE +0 -0
buildlog/mcp/server.py CHANGED
@@ -6,9 +6,16 @@ from mcp.server.fastmcp import FastMCP
6
6
 
7
7
  from buildlog.mcp.tools import (
8
8
  buildlog_diff,
9
+ buildlog_end_session,
10
+ buildlog_experiment_report,
9
11
  buildlog_learn_from_review,
12
+ buildlog_log_mistake,
13
+ buildlog_log_reward,
10
14
  buildlog_promote,
11
15
  buildlog_reject,
16
+ buildlog_rewards,
17
+ buildlog_session_metrics,
18
+ buildlog_start_session,
12
19
  buildlog_status,
13
20
  )
14
21
 
@@ -20,6 +27,15 @@ mcp.tool()(buildlog_promote)
20
27
  mcp.tool()(buildlog_reject)
21
28
  mcp.tool()(buildlog_diff)
22
29
  mcp.tool()(buildlog_learn_from_review)
30
+ mcp.tool()(buildlog_log_reward)
31
+ mcp.tool()(buildlog_rewards)
32
+
33
+ # Session tracking tools (experiment infrastructure)
34
+ mcp.tool()(buildlog_start_session)
35
+ mcp.tool()(buildlog_end_session)
36
+ mcp.tool()(buildlog_log_mistake)
37
+ mcp.tool()(buildlog_session_metrics)
38
+ mcp.tool()(buildlog_experiment_report)
23
39
 
24
40
 
25
41
  def main() -> None:
buildlog/mcp/tools.py CHANGED
@@ -9,7 +9,20 @@ from dataclasses import asdict
9
9
  from pathlib import Path
10
10
  from typing import Literal
11
11
 
12
- from buildlog.core import diff, learn_from_review, promote, reject, status
12
+ from buildlog.core import (
13
+ diff,
14
+ end_session,
15
+ get_experiment_report,
16
+ get_rewards,
17
+ get_session_metrics,
18
+ learn_from_review,
19
+ log_mistake,
20
+ log_reward,
21
+ promote,
22
+ reject,
23
+ start_session,
24
+ status,
25
+ )
13
26
 
14
27
 
15
28
  def _validate_skill_ids(skill_ids: list[str]) -> list[str]:
@@ -140,3 +153,255 @@ def buildlog_learn_from_review(
140
153
  """
141
154
  result = learn_from_review(Path(buildlog_dir), issues, source)
142
155
  return asdict(result)
156
+
157
+
158
+ def buildlog_log_reward(
159
+ outcome: str,
160
+ rules_active: list[str] | None = None,
161
+ revision_distance: float | None = None,
162
+ error_class: str | None = None,
163
+ notes: str | None = None,
164
+ buildlog_dir: str = "buildlog",
165
+ ) -> dict:
166
+ """Log a reward signal for bandit learning.
167
+
168
+ Call this after agent work to provide feedback on the outcome.
169
+ This enables learning which rules are effective in which contexts.
170
+
171
+ Args:
172
+ outcome: Type of feedback:
173
+ - "accepted": Work was accepted as-is (reward=1.0)
174
+ - "revision": Work needed changes (reward=1-distance)
175
+ - "rejected": Work was rejected entirely (reward=0.0)
176
+ rules_active: List of rule IDs that were in context during the work
177
+ revision_distance: How much correction was needed (0-1, 0=minor tweak, 1=complete redo)
178
+ error_class: Category of error if applicable (e.g., "missing_test", "validation_boundary")
179
+ notes: Optional notes about the feedback
180
+ buildlog_dir: Path to buildlog directory
181
+
182
+ Returns:
183
+ Dict with reward_id, reward_value, total_events
184
+
185
+ Example:
186
+ # Work was accepted
187
+ buildlog_log_reward(outcome="accepted", rules_active=["arch-123", "wf-456"])
188
+
189
+ # Work needed revision
190
+ buildlog_log_reward(
191
+ outcome="revision",
192
+ revision_distance=0.3,
193
+ error_class="missing_test",
194
+ notes="Forgot to test error path"
195
+ )
196
+
197
+ # Work was rejected
198
+ buildlog_log_reward(outcome="rejected", notes="Completely wrong approach")
199
+ """
200
+ # Validate outcome
201
+ if outcome not in ("accepted", "revision", "rejected"):
202
+ return {
203
+ "reward_id": "",
204
+ "reward_value": 0.0,
205
+ "total_events": 0,
206
+ "message": "",
207
+ "error": f"Invalid outcome: {outcome}. Must be 'accepted', 'revision', or 'rejected'",
208
+ }
209
+
210
+ result = log_reward(
211
+ Path(buildlog_dir),
212
+ outcome=outcome, # type: ignore[arg-type]
213
+ rules_active=rules_active,
214
+ revision_distance=revision_distance,
215
+ error_class=error_class,
216
+ notes=notes,
217
+ source="mcp",
218
+ )
219
+ return asdict(result)
220
+
221
+
222
+ def buildlog_rewards(
223
+ limit: int | None = None,
224
+ buildlog_dir: str = "buildlog",
225
+ ) -> dict:
226
+ """Get reward events with summary statistics.
227
+
228
+ Returns recent reward events and aggregate statistics useful for
229
+ understanding learning progress.
230
+
231
+ Args:
232
+ limit: Maximum number of events to return (most recent first)
233
+ buildlog_dir: Path to buildlog directory
234
+
235
+ Returns:
236
+ Dict with:
237
+ - total_events: Total count of reward events
238
+ - accepted: Count of accepted outcomes
239
+ - revisions: Count of revision outcomes
240
+ - rejected: Count of rejected outcomes
241
+ - mean_reward: Average reward value
242
+ - events: List of recent events (limited)
243
+
244
+ Example:
245
+ buildlog_rewards(limit=10) # Get 10 most recent events with stats
246
+ """
247
+ result = get_rewards(Path(buildlog_dir), limit)
248
+
249
+ # Convert events to dicts
250
+ return {
251
+ "total_events": result.total_events,
252
+ "accepted": result.accepted,
253
+ "revisions": result.revisions,
254
+ "rejected": result.rejected,
255
+ "mean_reward": result.mean_reward,
256
+ "events": [e.to_dict() for e in result.events],
257
+ }
258
+
259
+
260
+ # -----------------------------------------------------------------------------
261
+ # Session Tracking MCP Tools (Experiment Infrastructure)
262
+ # -----------------------------------------------------------------------------
263
+
264
+
265
+ def buildlog_start_session(
266
+ error_class: str | None = None,
267
+ notes: str | None = None,
268
+ buildlog_dir: str = "buildlog",
269
+ ) -> dict:
270
+ """Start a new experiment session.
271
+
272
+ Begins tracking for a learning experiment. Captures the current
273
+ set of active rules to measure learning over time.
274
+
275
+ Args:
276
+ error_class: Error class being targeted (e.g., "missing_test")
277
+ notes: Notes about this session
278
+ buildlog_dir: Path to buildlog directory
279
+
280
+ Returns:
281
+ Dict with session_id, error_class, rules_count, message
282
+
283
+ Example:
284
+ buildlog_start_session(error_class="missing_test")
285
+ """
286
+ result = start_session(
287
+ Path(buildlog_dir),
288
+ error_class=error_class,
289
+ notes=notes,
290
+ )
291
+ return asdict(result)
292
+
293
+
294
+ def buildlog_end_session(
295
+ entry_file: str | None = None,
296
+ notes: str | None = None,
297
+ buildlog_dir: str = "buildlog",
298
+ ) -> dict:
299
+ """End the current experiment session.
300
+
301
+ Finalizes the session and calculates metrics including:
302
+ - Total mistakes logged
303
+ - Repeated mistakes (from prior sessions)
304
+ - Rules added during session
305
+
306
+ Args:
307
+ entry_file: Corresponding buildlog entry file, if any
308
+ notes: Additional notes to append
309
+ buildlog_dir: Path to buildlog directory
310
+
311
+ Returns:
312
+ Dict with session_id, duration_minutes, mistakes_logged,
313
+ repeated_mistakes, rules_at_start, rules_at_end, message
314
+
315
+ Example:
316
+ buildlog_end_session(entry_file="2026-01-21.md")
317
+ """
318
+ result = end_session(
319
+ Path(buildlog_dir),
320
+ entry_file=entry_file,
321
+ notes=notes,
322
+ )
323
+ return asdict(result)
324
+
325
+
326
+ def buildlog_log_mistake(
327
+ error_class: str,
328
+ description: str,
329
+ corrected_by_rule: str | None = None,
330
+ buildlog_dir: str = "buildlog",
331
+ ) -> dict:
332
+ """Log a mistake during the current session.
333
+
334
+ Records the mistake and checks if it's a repeat of a prior mistake
335
+ (from earlier sessions). This enables measuring repeated-mistake rates.
336
+
337
+ Args:
338
+ error_class: Category of error (e.g., "missing_test")
339
+ description: Description of the mistake
340
+ corrected_by_rule: Rule ID that should have prevented this
341
+ buildlog_dir: Path to buildlog directory
342
+
343
+ Returns:
344
+ Dict with mistake_id, session_id, was_repeat, similar_prior, message
345
+
346
+ Example:
347
+ buildlog_log_mistake(
348
+ error_class="missing_test",
349
+ description="Forgot to add unit tests for new helper function"
350
+ )
351
+ """
352
+ result = log_mistake(
353
+ Path(buildlog_dir),
354
+ error_class=error_class,
355
+ description=description,
356
+ corrected_by_rule=corrected_by_rule,
357
+ )
358
+ return asdict(result)
359
+
360
+
361
+ def buildlog_session_metrics(
362
+ session_id: str | None = None,
363
+ buildlog_dir: str = "buildlog",
364
+ ) -> dict:
365
+ """Get metrics for a session or all sessions.
366
+
367
+ Returns mistake rates and rule changes for analysis.
368
+
369
+ Args:
370
+ session_id: Specific session ID, or None for aggregate metrics
371
+ buildlog_dir: Path to buildlog directory
372
+
373
+ Returns:
374
+ Dict with session_id, total_mistakes, repeated_mistakes,
375
+ repeated_mistake_rate, rules_at_start, rules_at_end, rules_added
376
+
377
+ Example:
378
+ buildlog_session_metrics() # Aggregate metrics
379
+ buildlog_session_metrics(session_id="session-20260121-140000")
380
+ """
381
+ result = get_session_metrics(
382
+ Path(buildlog_dir),
383
+ session_id=session_id,
384
+ )
385
+ return asdict(result)
386
+
387
+
388
+ def buildlog_experiment_report(
389
+ buildlog_dir: str = "buildlog",
390
+ ) -> dict:
391
+ """Generate a comprehensive experiment report.
392
+
393
+ Returns summary statistics, per-session breakdown, and error class analysis.
394
+
395
+ Args:
396
+ buildlog_dir: Path to buildlog directory
397
+
398
+ Returns:
399
+ Dict with:
400
+ - summary: Overall statistics
401
+ - sessions: Per-session breakdown
402
+ - error_classes: Breakdown by error class
403
+
404
+ Example:
405
+ buildlog_experiment_report()
406
+ """
407
+ return get_experiment_report(Path(buildlog_dir))
@@ -0,0 +1,74 @@
1
+ """Seed Engine - Formalized pipeline for creating reviewer personas.
2
+
3
+ The seed engine abstracts the 4-step process for bootstrapping
4
+ defensible reviewer personas from authoritative domain sources:
5
+
6
+ 1. SOURCE IDENTIFICATION - Define authoritative sources
7
+ 2. RULE EXTRACTION - Extract candidate rules with defensibility fields
8
+ 3. CATEGORIZATION - Map rules to persona concern categories
9
+ 4. SEED GENERATION - Output validated YAML seed file
10
+
11
+ Usage:
12
+ from buildlog.seed_engine import Pipeline, Source, SourceType
13
+
14
+ # Define sources
15
+ sources = [
16
+ Source(
17
+ name="OWASP Top 10",
18
+ url="https://owasp.org/Top10/",
19
+ source_type=SourceType.REFERENCE_DOC,
20
+ domain="security",
21
+ )
22
+ ]
23
+
24
+ # Run pipeline
25
+ pipeline = Pipeline(persona="security_karen")
26
+ seed_file = pipeline.run(sources)
27
+ """
28
+
29
+ from buildlog.seed_engine.categorizers import (
30
+ Categorizer,
31
+ CategoryMapping,
32
+ TagBasedCategorizer,
33
+ )
34
+ from buildlog.seed_engine.extractors import ManualExtractor, RuleExtractor
35
+ from buildlog.seed_engine.generators import SeedGenerator
36
+ from buildlog.seed_engine.models import (
37
+ CandidateRule,
38
+ CategorizedRule,
39
+ Source,
40
+ SourceType,
41
+ )
42
+ from buildlog.seed_engine.pipeline import Pipeline
43
+ from buildlog.seed_engine.sources import (
44
+ FetchStatus,
45
+ SourceEntry,
46
+ SourceFetcher,
47
+ SourceManifest,
48
+ url_to_cache_filename,
49
+ )
50
+
51
+ __all__ = [
52
+ # Models
53
+ "Source",
54
+ "SourceType",
55
+ "CandidateRule",
56
+ "CategorizedRule",
57
+ # Pipeline
58
+ "Pipeline",
59
+ # Extractors
60
+ "RuleExtractor",
61
+ "ManualExtractor",
62
+ # Categorizers
63
+ "Categorizer",
64
+ "TagBasedCategorizer",
65
+ "CategoryMapping",
66
+ # Generators
67
+ "SeedGenerator",
68
+ # Sources
69
+ "FetchStatus",
70
+ "SourceEntry",
71
+ "SourceManifest",
72
+ "SourceFetcher",
73
+ "url_to_cache_filename",
74
+ ]
@@ -0,0 +1,145 @@
1
+ """Rule categorizers for Step 3 of the seed engine pipeline.
2
+
3
+ Categorizers take candidate rules and assign final categories and tags.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ from abc import ABC, abstractmethod
9
+ from collections.abc import Callable
10
+ from dataclasses import dataclass
11
+
12
+ from buildlog.seed_engine.models import CandidateRule, CategorizedRule
13
+
14
+
15
+ class Categorizer(ABC):
16
+ """Protocol for categorizing rules.
17
+
18
+ Implementations:
19
+ - TagBasedCategorizer: Category from tags/keywords
20
+ - MappingCategorizer: Explicit source→category mapping
21
+ """
22
+
23
+ @abstractmethod
24
+ def categorize(self, rule: CandidateRule) -> CategorizedRule:
25
+ """Assign category and final tags to a rule.
26
+
27
+ Args:
28
+ rule: The candidate rule to categorize.
29
+
30
+ Returns:
31
+ Categorized rule ready for seed generation.
32
+ """
33
+ ...
34
+
35
+
36
+ @dataclass
37
+ class CategoryMapping:
38
+ """Mapping from keywords/tags to category."""
39
+
40
+ category: str
41
+ keywords: list[str] # If any of these appear in tags/rule, assign this category
42
+ priority: int = 0 # Higher priority wins on conflicts
43
+
44
+
45
+ class TagBasedCategorizer(Categorizer):
46
+ """Categorize rules based on their tags and keywords.
47
+
48
+ Usage:
49
+ categorizer = TagBasedCategorizer(
50
+ default_category="testing",
51
+ mappings=[
52
+ CategoryMapping("coverage", ["coverage", "untested"]),
53
+ CategoryMapping("isolation", ["flaky", "order", "hermetic"]),
54
+ CategoryMapping("assertions", ["assert", "expect", "verify"]),
55
+ ],
56
+ tag_normalizer=lambda t: t.lower().replace("-", "_"),
57
+ )
58
+
59
+ categorized = categorizer.categorize(candidate_rule)
60
+ """
61
+
62
+ def __init__(
63
+ self,
64
+ default_category: str,
65
+ mappings: list[CategoryMapping] | None = None,
66
+ tag_normalizer: Callable[[str], str] | None = None,
67
+ additional_tags: list[str] | None = None,
68
+ ) -> None:
69
+ self.default_category = default_category
70
+ self.mappings = sorted(mappings or [], key=lambda m: m.priority, reverse=True)
71
+ self.tag_normalizer = tag_normalizer or (lambda t: t.lower())
72
+ self.additional_tags = additional_tags or []
73
+
74
+ def categorize(self, rule: CandidateRule) -> CategorizedRule:
75
+ """Assign category based on tag matching."""
76
+ # Normalize tags
77
+ normalized_tags = [self.tag_normalizer(t) for t in rule.raw_tags]
78
+
79
+ # Also check rule text for keywords
80
+ rule_text_lower = rule.rule.lower()
81
+
82
+ # Find matching category
83
+ category = self.default_category
84
+ for mapping in self.mappings:
85
+ for keyword in mapping.keywords:
86
+ keyword_lower = keyword.lower()
87
+ if keyword_lower in normalized_tags or keyword_lower in rule_text_lower:
88
+ category = mapping.category
89
+ break
90
+ else:
91
+ continue
92
+ break
93
+
94
+ # Build final tags
95
+ final_tags = list(set(normalized_tags + self.additional_tags))
96
+
97
+ return CategorizedRule.from_candidate(
98
+ candidate=rule,
99
+ category=category,
100
+ tags=final_tags,
101
+ )
102
+
103
+
104
+ class MappingCategorizer(Categorizer):
105
+ """Categorize rules via explicit source→category mapping.
106
+
107
+ Useful when sources map directly to categories
108
+ (e.g., OWASP A03 → "injection").
109
+
110
+ Usage:
111
+ categorizer = MappingCategorizer(
112
+ source_category_map={
113
+ "https://owasp.org/Top10/A03": "injection",
114
+ "https://owasp.org/Top10/A01": "access-control",
115
+ },
116
+ default_category="security",
117
+ )
118
+ """
119
+
120
+ def __init__(
121
+ self,
122
+ source_category_map: dict[str, str],
123
+ default_category: str,
124
+ tag_transform: Callable[[list[str]], list[str]] | None = None,
125
+ ) -> None:
126
+ self.source_category_map = source_category_map
127
+ self.default_category = default_category
128
+ self.tag_transform = tag_transform or (lambda tags: tags)
129
+
130
+ def categorize(self, rule: CandidateRule) -> CategorizedRule:
131
+ """Assign category based on source URL."""
132
+ # Find category by matching source URL prefix
133
+ category = self.default_category
134
+ for url_prefix, cat in self.source_category_map.items():
135
+ if rule.source.url.startswith(url_prefix):
136
+ category = cat
137
+ break
138
+
139
+ final_tags = self.tag_transform(rule.raw_tags)
140
+
141
+ return CategorizedRule.from_candidate(
142
+ candidate=rule,
143
+ category=category,
144
+ tags=final_tags,
145
+ )
@@ -0,0 +1,148 @@
1
+ """Rule extractors for Step 2 of the seed engine pipeline.
2
+
3
+ Extractors take sources and produce candidate rules.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ from abc import ABC, abstractmethod
9
+ from typing import Callable
10
+
11
+ from buildlog.seed_engine.models import CandidateRule, Source
12
+
13
+
14
+ class RuleExtractor(ABC):
15
+ """Protocol for extracting rules from sources.
16
+
17
+ Implementations:
18
+ - ManualExtractor: Human-curated rules (highest quality)
19
+ - LLMExtractor: LLM-assisted extraction (future)
20
+ - StructuredExtractor: Parse structured docs like OWASP (future)
21
+ """
22
+
23
+ @abstractmethod
24
+ def extract(self, source: Source) -> list[CandidateRule]:
25
+ """Extract candidate rules from a source.
26
+
27
+ Args:
28
+ source: The source to extract rules from.
29
+
30
+ Returns:
31
+ List of candidate rules with defensibility fields.
32
+ """
33
+ ...
34
+
35
+ @abstractmethod
36
+ def validate(self, rule: CandidateRule) -> list[str]:
37
+ """Validate a candidate rule, returning any issues.
38
+
39
+ Args:
40
+ rule: The rule to validate.
41
+
42
+ Returns:
43
+ List of validation issues (empty if valid).
44
+ """
45
+ ...
46
+
47
+
48
+ class ManualExtractor(RuleExtractor):
49
+ """Manual rule extraction via human curation.
50
+
51
+ This is the gold standard—humans read the source and
52
+ extract rules with full defensibility metadata.
53
+
54
+ Usage:
55
+ extractor = ManualExtractor()
56
+
57
+ # Register rules for a source
58
+ extractor.register(
59
+ source=google_testing_blog,
60
+ rules=[
61
+ CandidateRule(
62
+ rule="Tests must not depend on execution order",
63
+ context="Test suites with multiple tests",
64
+ antipattern="Test A sets state that Test B relies on",
65
+ rationale="Order-dependent tests are flaky",
66
+ source=google_testing_blog,
67
+ raw_tags=["isolation", "flaky"],
68
+ )
69
+ ]
70
+ )
71
+
72
+ # Extract returns registered rules
73
+ rules = extractor.extract(google_testing_blog)
74
+ """
75
+
76
+ def __init__(self) -> None:
77
+ self._rules_by_source: dict[str, list[CandidateRule]] = {}
78
+
79
+ def register(self, source: Source, rules: list[CandidateRule]) -> None:
80
+ """Register manually curated rules for a source.
81
+
82
+ Args:
83
+ source: The source these rules come from.
84
+ rules: The curated rules.
85
+ """
86
+ # Validate all rules are complete
87
+ for rule in rules:
88
+ issues = self.validate(rule)
89
+ if issues:
90
+ raise ValueError(
91
+ f"Invalid rule '{rule.rule[:50]}...': {'; '.join(issues)}"
92
+ )
93
+ self._rules_by_source[source.url] = rules
94
+
95
+ def extract(self, source: Source) -> list[CandidateRule]:
96
+ """Return registered rules for this source."""
97
+ return self._rules_by_source.get(source.url, [])
98
+
99
+ def validate(self, rule: CandidateRule) -> list[str]:
100
+ """Validate defensibility fields are populated."""
101
+ issues = []
102
+ if not rule.rule.strip():
103
+ issues.append("Rule text is empty")
104
+ if not rule.context.strip():
105
+ issues.append("Context is required for defensibility")
106
+ if not rule.antipattern.strip():
107
+ issues.append("Antipattern is required for defensibility")
108
+ if not rule.rationale.strip():
109
+ issues.append("Rationale is required for defensibility")
110
+ return issues
111
+
112
+
113
+ class FunctionExtractor(RuleExtractor):
114
+ """Extraction via custom function (for structured sources).
115
+
116
+ Allows plugging in custom extraction logic for sources
117
+ with known structure (e.g., OWASP pages, API docs).
118
+
119
+ Usage:
120
+ def extract_from_owasp(source: Source) -> list[CandidateRule]:
121
+ # Custom parsing logic for OWASP format
122
+ ...
123
+
124
+ extractor = FunctionExtractor(extract_from_owasp)
125
+ rules = extractor.extract(owasp_source)
126
+ """
127
+
128
+ def __init__(
129
+ self,
130
+ extract_fn: Callable[[Source], list[CandidateRule]],
131
+ validate_fn: Callable[[CandidateRule], list[str]] | None = None,
132
+ ) -> None:
133
+ self._extract_fn = extract_fn
134
+ self._validate_fn = validate_fn or self._default_validate
135
+
136
+ def extract(self, source: Source) -> list[CandidateRule]:
137
+ """Run the custom extraction function."""
138
+ return self._extract_fn(source)
139
+
140
+ def validate(self, rule: CandidateRule) -> list[str]:
141
+ """Run the validation function."""
142
+ return self._validate_fn(rule)
143
+
144
+ def _default_validate(self, rule: CandidateRule) -> list[str]:
145
+ """Default validation: check completeness."""
146
+ if not rule.is_complete():
147
+ return ["Rule is missing required defensibility fields"]
148
+ return []