buildlog 0.7.0__py3-none-any.whl → 0.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. buildlog/__init__.py +1 -1
  2. buildlog/cli.py +659 -48
  3. buildlog/confidence.py +27 -0
  4. buildlog/core/__init__.py +2 -0
  5. buildlog/core/bandit.py +699 -0
  6. buildlog/core/operations.py +284 -24
  7. buildlog/distill.py +80 -1
  8. buildlog/engine/__init__.py +61 -0
  9. buildlog/engine/bandit.py +23 -0
  10. buildlog/engine/confidence.py +28 -0
  11. buildlog/engine/embeddings.py +28 -0
  12. buildlog/engine/experiments.py +619 -0
  13. buildlog/engine/types.py +31 -0
  14. buildlog/llm.py +508 -0
  15. buildlog/mcp/server.py +10 -6
  16. buildlog/mcp/tools.py +61 -13
  17. buildlog/render/__init__.py +19 -2
  18. buildlog/render/claude_md.py +67 -32
  19. buildlog/render/continue_dev.py +102 -0
  20. buildlog/render/copilot.py +100 -0
  21. buildlog/render/cursor.py +105 -0
  22. buildlog/render/windsurf.py +95 -0
  23. buildlog/seed_engine/__init__.py +2 -0
  24. buildlog/seed_engine/llm_extractor.py +121 -0
  25. buildlog/seed_engine/pipeline.py +45 -1
  26. buildlog/skills.py +69 -6
  27. {buildlog-0.7.0.data → buildlog-0.9.0.data}/data/share/buildlog/copier.yml +0 -4
  28. buildlog-0.9.0.data/data/share/buildlog/template/buildlog/_TEMPLATE_QUICK.md +21 -0
  29. buildlog-0.9.0.dist-info/METADATA +248 -0
  30. buildlog-0.9.0.dist-info/RECORD +55 -0
  31. buildlog-0.7.0.dist-info/METADATA +0 -544
  32. buildlog-0.7.0.dist-info/RECORD +0 -41
  33. {buildlog-0.7.0.data → buildlog-0.9.0.data}/data/share/buildlog/post_gen.py +0 -0
  34. {buildlog-0.7.0.data → buildlog-0.9.0.data}/data/share/buildlog/template/buildlog/.gitkeep +0 -0
  35. {buildlog-0.7.0.data → buildlog-0.9.0.data}/data/share/buildlog/template/buildlog/2026-01-01-example.md +0 -0
  36. {buildlog-0.7.0.data → buildlog-0.9.0.data}/data/share/buildlog/template/buildlog/BUILDLOG_SYSTEM.md +0 -0
  37. {buildlog-0.7.0.data → buildlog-0.9.0.data}/data/share/buildlog/template/buildlog/_TEMPLATE.md +0 -0
  38. {buildlog-0.7.0.data → buildlog-0.9.0.data}/data/share/buildlog/template/buildlog/assets/.gitkeep +0 -0
  39. {buildlog-0.7.0.dist-info → buildlog-0.9.0.dist-info}/WHEEL +0 -0
  40. {buildlog-0.7.0.dist-info → buildlog-0.9.0.dist-info}/entry_points.txt +0 -0
  41. {buildlog-0.7.0.dist-info → buildlog-0.9.0.dist-info}/licenses/LICENSE +0 -0
buildlog/llm.py ADDED
@@ -0,0 +1,508 @@
1
+ """LLM-backed rule extraction, deduplication, and scoring.
2
+
3
+ Provides a provider-agnostic interface for using LLMs to:
4
+ - Extract structured rules from buildlog entries
5
+ - Select canonical forms when deduplicating similar rules
6
+ - Score rules with severity/scope/applicability
7
+
8
+ Provider cascade:
9
+ 1. Explicit config (.buildlog/config.yml or env)
10
+ 2. Injected at call site (API parameter)
11
+ 3. Auto-detect: Ollama -> Anthropic -> None (regex fallback)
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ __all__ = [
17
+ "ExtractedRule",
18
+ "RuleScoring",
19
+ "LLMConfig",
20
+ "LLMBackend",
21
+ "OllamaBackend",
22
+ "AnthropicBackend",
23
+ "PROVIDERS",
24
+ "register_provider",
25
+ "get_llm_backend",
26
+ ]
27
+
28
+ import json
29
+ import logging
30
+ import os
31
+ import time
32
+ from dataclasses import dataclass, field
33
+ from pathlib import Path
34
+ from types import MappingProxyType
35
+ from typing import Protocol, runtime_checkable
36
+
37
+ logger = logging.getLogger(__name__)
38
+
39
+ # --- Data types (provider-agnostic) ---
40
+
41
+ VALID_SEVERITIES = ("critical", "major", "minor", "info")
42
+ VALID_SCOPES = ("global", "module", "function")
43
+ VALID_CATEGORIES = ("architectural", "workflow", "tool_usage", "domain_knowledge")
44
+
45
+
46
+ @dataclass
47
+ class ExtractedRule:
48
+ """A rule extracted from buildlog text by an LLM."""
49
+
50
+ rule: str
51
+ category: str # architectural/workflow/tool_usage/domain_knowledge
52
+ severity: str = "info" # critical/major/minor/info
53
+ scope: str = "global" # global/module/function
54
+ applicability: list[str] = field(default_factory=list)
55
+ context: str | None = None # when to apply
56
+ antipattern: str | None = None # what violation looks like
57
+ rationale: str | None = None # why it matters
58
+
59
+ def __post_init__(self) -> None:
60
+ if self.severity not in VALID_SEVERITIES:
61
+ self.severity = "info"
62
+ if self.scope not in VALID_SCOPES:
63
+ self.scope = "global"
64
+ if self.category not in VALID_CATEGORIES:
65
+ self.category = "architectural"
66
+
67
+
68
+ @dataclass
69
+ class RuleScoring:
70
+ """Severity/scope/applicability scoring for a rule."""
71
+
72
+ severity: str = "info"
73
+ scope: str = "global"
74
+ applicability: list[str] = field(default_factory=list)
75
+
76
+
77
+ # --- Provider config ---
78
+
79
+
80
+ @dataclass
81
+ class LLMConfig:
82
+ """Configuration for an LLM provider."""
83
+
84
+ provider: str # "ollama", "anthropic", "openai", ...
85
+ model: str | None = None # None = auto-detect or provider default
86
+ base_url: str | None = None # Override endpoint
87
+ api_key: str | None = None # From config or env var
88
+
89
+ def __repr__(self) -> str:
90
+ """Redact api_key to prevent accidental exposure in logs/tracebacks."""
91
+ key_display = "***" if self.api_key else "None"
92
+ return (
93
+ f"LLMConfig(provider={self.provider!r}, model={self.model!r}, "
94
+ f"base_url={self.base_url!r}, api_key={key_display})"
95
+ )
96
+
97
+ @classmethod
98
+ def from_buildlog_config(cls, buildlog_dir: Path) -> LLMConfig | None:
99
+ """Read from .buildlog/config.yml [llm] section."""
100
+ config_path = buildlog_dir / ".buildlog" / "config.yml"
101
+ if not config_path.exists():
102
+ return None
103
+
104
+ try:
105
+ import yaml
106
+ except ImportError:
107
+ logger.debug("PyYAML not available, skipping config file")
108
+ return None
109
+
110
+ try:
111
+ data = yaml.safe_load(config_path.read_text())
112
+ except Exception:
113
+ logger.warning("Failed to parse %s", config_path)
114
+ return None
115
+
116
+ if not isinstance(data, dict):
117
+ return None
118
+
119
+ llm_config = data.get("llm")
120
+ if not isinstance(llm_config, dict):
121
+ return None
122
+
123
+ provider = llm_config.get("provider")
124
+ if not provider:
125
+ return None
126
+
127
+ return cls(
128
+ provider=str(provider),
129
+ model=llm_config.get("model"),
130
+ base_url=llm_config.get("base_url"),
131
+ api_key=llm_config.get("api_key"),
132
+ )
133
+
134
+ @classmethod
135
+ def auto_detect(cls) -> LLMConfig | None:
136
+ """Ollama running? -> use it. ANTHROPIC_API_KEY? -> use that. Else None."""
137
+ # Try Ollama first (local, no API key needed)
138
+ if _is_ollama_available():
139
+ return cls(provider="ollama")
140
+
141
+ # Try Anthropic (cloud)
142
+ api_key = os.environ.get("ANTHROPIC_API_KEY")
143
+ if api_key:
144
+ return cls(provider="anthropic", api_key=api_key)
145
+
146
+ return None
147
+
148
+
149
+ def _is_ollama_available() -> bool:
150
+ """Check if Ollama is running and accessible."""
151
+ try:
152
+ import ollama as ollama_lib
153
+
154
+ ollama_lib.list()
155
+ return True
156
+ except Exception:
157
+ return False
158
+
159
+
160
+ # --- Interface ---
161
+
162
+
163
+ @runtime_checkable
164
+ class LLMBackend(Protocol):
165
+ """Protocol for LLM backends."""
166
+
167
+ def extract_rules(self, entry_text: str) -> list[ExtractedRule]:
168
+ """Extract structured rules from buildlog entry text."""
169
+ ...
170
+
171
+ def select_canonical(self, candidates: list[str]) -> str:
172
+ """Given similar rules, produce the single best canonical form."""
173
+ ...
174
+
175
+ def score_rule(self, rule: str, context: str) -> RuleScoring:
176
+ """Score a rule with severity/scope/applicability."""
177
+ ...
178
+
179
+
180
+ # --- Prompts ---
181
+
182
+ EXTRACT_RULES_PROMPT = """\
183
+ You are analyzing a buildlog entry's Improvements section. Extract actionable rules.
184
+
185
+ For each rule, return a JSON array of objects with these fields:
186
+ - "rule": string — the actionable rule in imperative form
187
+ - "category": string — one of: architectural, workflow, tool_usage, domain_knowledge
188
+ - "severity": string — one of: critical, major, minor, info
189
+ - "scope": string — one of: global, module, function
190
+ - "applicability": array of strings — contexts where relevant (e.g., "python", "api-design")
191
+ - "context": string or null — when to apply this rule
192
+ - "antipattern": string or null — what violation looks like
193
+ - "rationale": string or null — why it matters
194
+
195
+ Return ONLY a JSON array. No markdown, no explanation.
196
+
197
+ Text to analyze:
198
+ {text}
199
+ """
200
+
201
+ SELECT_CANONICAL_PROMPT = """\
202
+ Given these similar rules, produce the single best canonical form.
203
+ The canonical rule should be clear, concise, and actionable.
204
+
205
+ Similar rules:
206
+ {candidates}
207
+
208
+ Return ONLY the canonical rule text as a plain string. No JSON, no quotes, no explanation.
209
+ """
210
+
211
+ SCORE_RULE_PROMPT = """\
212
+ Score this rule for severity, scope, and applicability.
213
+
214
+ Rule: {rule}
215
+ Context: {context}
216
+
217
+ Return ONLY a JSON object with:
218
+ - "severity": one of: critical, major, minor, info
219
+ - "scope": one of: global, module, function
220
+ - "applicability": array of strings (contexts where relevant)
221
+
222
+ No markdown, no explanation.
223
+ """
224
+
225
+
226
+ def _parse_json_response(text: str) -> list | dict:
227
+ """Parse JSON from LLM response, handling markdown code blocks."""
228
+ text = text.strip()
229
+ # Strip markdown code blocks
230
+ if text.startswith("```"):
231
+ lines = text.split("\n")
232
+ # Remove first and last lines (``` markers)
233
+ lines = [ln for ln in lines[1:] if not ln.strip().startswith("```")]
234
+ text = "\n".join(lines)
235
+ return json.loads(text)
236
+
237
+
238
+ # --- Rate limiting ---
239
+
240
+ # Minimum seconds between API calls (per-backend instance).
241
+ _MIN_CALL_INTERVAL = 0.5
242
+
243
+
244
+ class _RateLimiter:
245
+ """Simple per-instance rate limiter to prevent API abuse."""
246
+
247
+ def __init__(self, min_interval: float = _MIN_CALL_INTERVAL):
248
+ self._min_interval = min_interval
249
+ self._last_call: float = 0.0
250
+
251
+ def wait(self) -> None:
252
+ """Block until min_interval has elapsed since last call."""
253
+ now = time.monotonic()
254
+ elapsed = now - self._last_call
255
+ if elapsed < self._min_interval:
256
+ time.sleep(self._min_interval - elapsed)
257
+ self._last_call = time.monotonic()
258
+
259
+
260
+ # --- Implementations ---
261
+
262
+
263
+ class OllamaBackend:
264
+ """LLM backend using Ollama (local)."""
265
+
266
+ def __init__(self, model: str | None = None, base_url: str | None = None):
267
+ self._model = model
268
+ self._base_url = base_url
269
+ self._resolved_model: str | None = None
270
+ self._rate_limiter = _RateLimiter()
271
+
272
+ def _get_model(self) -> str:
273
+ """Resolve model name, auto-detecting largest if not specified."""
274
+ if self._resolved_model:
275
+ return self._resolved_model
276
+
277
+ if self._model:
278
+ self._resolved_model = self._model
279
+ return self._resolved_model
280
+
281
+ # Auto-detect: pick largest pulled model
282
+ try:
283
+ import ollama as ollama_lib
284
+
285
+ models = ollama_lib.list()
286
+ if not models or not models.get("models"):
287
+ raise RuntimeError(
288
+ "No Ollama models found. Pull one with: ollama pull llama3.2"
289
+ )
290
+
291
+ model_list = models["models"]
292
+ # Sort by size descending, pick largest
293
+ largest = max(model_list, key=lambda m: m.get("size", 0))
294
+ model_name: str = largest["name"]
295
+ self._resolved_model = model_name
296
+ logger.info("Auto-detected Ollama model: %s", model_name)
297
+ return model_name
298
+ except ImportError:
299
+ raise ImportError(
300
+ "ollama package is required. Install with: pip install buildlog[ollama]"
301
+ )
302
+
303
+ def _chat(self, prompt: str) -> str:
304
+ """Send a prompt to Ollama and return the response text."""
305
+ self._rate_limiter.wait()
306
+ import ollama as ollama_lib
307
+
308
+ kwargs = {
309
+ "model": self._get_model(),
310
+ "messages": [{"role": "user", "content": prompt}],
311
+ }
312
+ if self._base_url:
313
+ client = ollama_lib.Client(host=self._base_url)
314
+ response = client.chat(**kwargs)
315
+ else:
316
+ response = ollama_lib.chat(**kwargs)
317
+ return response["message"]["content"]
318
+
319
+ def extract_rules(self, entry_text: str) -> list[ExtractedRule]:
320
+ """Extract structured rules from buildlog entry text."""
321
+ prompt = EXTRACT_RULES_PROMPT.format(text=entry_text)
322
+ try:
323
+ response = self._chat(prompt)
324
+ parsed = _parse_json_response(response)
325
+ if not isinstance(parsed, list):
326
+ parsed = [parsed]
327
+ return [ExtractedRule(**item) for item in parsed]
328
+ except Exception as e:
329
+ logger.warning("Ollama extraction failed: %s", e)
330
+ return []
331
+
332
+ def select_canonical(self, candidates: list[str]) -> str:
333
+ """Given similar rules, produce the single best canonical form."""
334
+ numbered = "\n".join(f"{i+1}. {c}" for i, c in enumerate(candidates))
335
+ prompt = SELECT_CANONICAL_PROMPT.format(candidates=numbered)
336
+ try:
337
+ response = self._chat(prompt)
338
+ return response.strip().strip('"').strip("'")
339
+ except Exception as e:
340
+ logger.warning("Ollama canonical selection failed: %s", e)
341
+ return min(candidates, key=len)
342
+
343
+ def score_rule(self, rule: str, context: str) -> RuleScoring:
344
+ """Score a rule with severity/scope/applicability."""
345
+ prompt = SCORE_RULE_PROMPT.format(rule=rule, context=context)
346
+ try:
347
+ response = self._chat(prompt)
348
+ parsed = _parse_json_response(response)
349
+ if isinstance(parsed, dict):
350
+ return RuleScoring(
351
+ severity=parsed.get("severity", "info"),
352
+ scope=parsed.get("scope", "global"),
353
+ applicability=parsed.get("applicability", []),
354
+ )
355
+ except Exception as e:
356
+ logger.warning("Ollama scoring failed: %s", e)
357
+ return RuleScoring()
358
+
359
+
360
+ class AnthropicBackend:
361
+ """LLM backend using Anthropic Claude API."""
362
+
363
+ def __init__(
364
+ self,
365
+ model: str | None = None,
366
+ api_key: str | None = None,
367
+ ):
368
+ self._model = model or "claude-haiku-4-20250514"
369
+ self._api_key = api_key or os.environ.get("ANTHROPIC_API_KEY")
370
+ self._client = None
371
+ self._rate_limiter = _RateLimiter()
372
+
373
+ def __repr__(self) -> str:
374
+ """Redact API key from repr to prevent exposure in logs/tracebacks."""
375
+ return f"AnthropicBackend(model={self._model!r}, api_key=***)"
376
+
377
+ def _get_client(self):
378
+ """Lazy-load the Anthropic client."""
379
+ if self._client is None:
380
+ try:
381
+ import anthropic
382
+ except ImportError:
383
+ raise ImportError(
384
+ "anthropic package is required. Install with: pip install buildlog[anthropic]"
385
+ )
386
+ if not self._api_key:
387
+ raise ValueError("ANTHROPIC_API_KEY is required for Anthropic backend")
388
+ self._client = anthropic.Anthropic(api_key=self._api_key)
389
+ return self._client
390
+
391
+ def _chat(self, prompt: str) -> str:
392
+ """Send a prompt to Claude and return the response text."""
393
+ self._rate_limiter.wait()
394
+ client = self._get_client()
395
+ response = client.messages.create(
396
+ model=self._model,
397
+ max_tokens=2048,
398
+ messages=[{"role": "user", "content": prompt}],
399
+ )
400
+ return response.content[0].text
401
+
402
+ def extract_rules(self, entry_text: str) -> list[ExtractedRule]:
403
+ """Extract structured rules from buildlog entry text."""
404
+ prompt = EXTRACT_RULES_PROMPT.format(text=entry_text)
405
+ try:
406
+ response = self._chat(prompt)
407
+ parsed = _parse_json_response(response)
408
+ if not isinstance(parsed, list):
409
+ parsed = [parsed]
410
+ return [ExtractedRule(**item) for item in parsed]
411
+ except Exception as e:
412
+ logger.warning("Anthropic extraction failed: %s", e)
413
+ return []
414
+
415
+ def select_canonical(self, candidates: list[str]) -> str:
416
+ """Given similar rules, produce the single best canonical form."""
417
+ numbered = "\n".join(f"{i+1}. {c}" for i, c in enumerate(candidates))
418
+ prompt = SELECT_CANONICAL_PROMPT.format(candidates=numbered)
419
+ try:
420
+ response = self._chat(prompt)
421
+ return response.strip().strip('"').strip("'")
422
+ except Exception as e:
423
+ logger.warning("Anthropic canonical selection failed: %s", e)
424
+ return min(candidates, key=len)
425
+
426
+ def score_rule(self, rule: str, context: str) -> RuleScoring:
427
+ """Score a rule with severity/scope/applicability."""
428
+ prompt = SCORE_RULE_PROMPT.format(rule=rule, context=context)
429
+ try:
430
+ response = self._chat(prompt)
431
+ parsed = _parse_json_response(response)
432
+ if isinstance(parsed, dict):
433
+ return RuleScoring(
434
+ severity=parsed.get("severity", "info"),
435
+ scope=parsed.get("scope", "global"),
436
+ applicability=parsed.get("applicability", []),
437
+ )
438
+ except Exception as e:
439
+ logger.warning("Anthropic scoring failed: %s", e)
440
+ return RuleScoring()
441
+
442
+
443
+ # --- Registry ---
444
+
445
+ _PROVIDERS: dict[str, type] = {
446
+ "ollama": OllamaBackend,
447
+ "anthropic": AnthropicBackend,
448
+ }
449
+ # Public read-only view. Use register_provider() to add entries.
450
+ PROVIDERS: MappingProxyType[str, type] = MappingProxyType(_PROVIDERS)
451
+
452
+
453
+ def register_provider(name: str, cls: type) -> None:
454
+ """Register a new LLM provider backend.
455
+
456
+ This is the only sanctioned way to mutate the provider registry.
457
+ """
458
+ if not isinstance(name, str) or not name.strip():
459
+ raise ValueError("Provider name must be a non-empty string")
460
+ _PROVIDERS[name] = cls
461
+
462
+
463
+ def get_llm_backend(
464
+ config: LLMConfig | None = None,
465
+ buildlog_dir: Path | None = None,
466
+ ) -> LLMBackend | None:
467
+ """Get an LLM backend using the provider cascade.
468
+
469
+ Resolution order:
470
+ 1. Explicit config parameter (highest priority)
471
+ 2. Config file (.buildlog/config.yml)
472
+ 3. Auto-detect: Ollama -> Anthropic -> None
473
+
474
+ Returns None if no provider is available (regex fallback).
475
+ """
476
+ # 1. Explicit config
477
+ if config is None and buildlog_dir is not None:
478
+ # 2. Config file
479
+ config = LLMConfig.from_buildlog_config(buildlog_dir)
480
+
481
+ if config is None:
482
+ # 3. Auto-detect
483
+ config = LLMConfig.auto_detect()
484
+
485
+ if config is None:
486
+ logger.info("No LLM provider available, using regex fallback")
487
+ return None
488
+
489
+ provider_cls = _PROVIDERS.get(config.provider)
490
+ if provider_cls is None:
491
+ logger.warning("Unknown LLM provider: %s", config.provider)
492
+ return None
493
+
494
+ try:
495
+ kwargs: dict = {}
496
+ if config.model:
497
+ kwargs["model"] = config.model
498
+ if config.provider == "ollama" and config.base_url:
499
+ kwargs["base_url"] = config.base_url
500
+ if config.provider == "anthropic" and config.api_key:
501
+ kwargs["api_key"] = config.api_key
502
+
503
+ backend = provider_cls(**kwargs)
504
+ logger.info("Using LLM provider: %s", config.provider)
505
+ return backend
506
+ except Exception as e:
507
+ logger.warning("Failed to initialize %s backend: %s", config.provider, e)
508
+ return None
buildlog/mcp/server.py CHANGED
@@ -5,9 +5,12 @@ from __future__ import annotations
5
5
  from mcp.server.fastmcp import FastMCP
6
6
 
7
7
  from buildlog.mcp.tools import (
8
+ buildlog_bandit_status,
8
9
  buildlog_diff,
9
- buildlog_end_session,
10
+ buildlog_experiment_end,
11
+ buildlog_experiment_metrics,
10
12
  buildlog_experiment_report,
13
+ buildlog_experiment_start,
11
14
  buildlog_gauntlet_accept_risk,
12
15
  buildlog_gauntlet_issues,
13
16
  buildlog_learn_from_review,
@@ -16,8 +19,6 @@ from buildlog.mcp.tools import (
16
19
  buildlog_promote,
17
20
  buildlog_reject,
18
21
  buildlog_rewards,
19
- buildlog_session_metrics,
20
- buildlog_start_session,
21
22
  buildlog_status,
22
23
  )
23
24
 
@@ -33,16 +34,19 @@ mcp.tool()(buildlog_log_reward)
33
34
  mcp.tool()(buildlog_rewards)
34
35
 
35
36
  # Session tracking tools (experiment infrastructure)
36
- mcp.tool()(buildlog_start_session)
37
- mcp.tool()(buildlog_end_session)
37
+ mcp.tool()(buildlog_experiment_start)
38
+ mcp.tool()(buildlog_experiment_end)
38
39
  mcp.tool()(buildlog_log_mistake)
39
- mcp.tool()(buildlog_session_metrics)
40
+ mcp.tool()(buildlog_experiment_metrics)
40
41
  mcp.tool()(buildlog_experiment_report)
41
42
 
42
43
  # Gauntlet loop tools
43
44
  mcp.tool()(buildlog_gauntlet_issues)
44
45
  mcp.tool()(buildlog_gauntlet_accept_risk)
45
46
 
47
+ # Bandit tools
48
+ mcp.tool()(buildlog_bandit_status)
49
+
46
50
 
47
51
  def main() -> None:
48
52
  """Run the MCP server."""
buildlog/mcp/tools.py CHANGED
@@ -12,6 +12,7 @@ from typing import Literal
12
12
  from buildlog.core import (
13
13
  diff,
14
14
  end_session,
15
+ get_bandit_status,
15
16
  get_experiment_report,
16
17
  get_rewards,
17
18
  get_session_metrics,
@@ -52,17 +53,17 @@ def buildlog_status(
52
53
 
53
54
  def buildlog_promote(
54
55
  skill_ids: list[str],
55
- target: Literal["claude_md", "settings_json", "skill"] = "claude_md",
56
+ target: str = "claude_md",
56
57
  buildlog_dir: str = "buildlog",
57
58
  ) -> dict:
58
59
  """Promote skills to your agent's rules.
59
60
 
60
- Writes selected skills to CLAUDE.md, .claude/settings.json, or
61
- .claude/skills/buildlog-learned/SKILL.md (Anthropic Agent Skills format).
61
+ Writes selected skills to agent-specific rule files.
62
62
 
63
63
  Args:
64
64
  skill_ids: List of skill IDs to promote (e.g., ["arch-b0fcb62a1e"])
65
- target: Where to write rules ("claude_md", "settings_json", or "skill")
65
+ target: Where to write rules. One of: claude_md, settings_json,
66
+ skill, cursor, copilot, windsurf, continue_dev.
66
67
  buildlog_dir: Path to buildlog directory
67
68
 
68
69
  Returns:
@@ -262,36 +263,47 @@ def buildlog_rewards(
262
263
  # -----------------------------------------------------------------------------
263
264
 
264
265
 
265
- def buildlog_start_session(
266
+ def buildlog_experiment_start(
266
267
  error_class: str | None = None,
267
268
  notes: str | None = None,
269
+ select_k: int = 3,
268
270
  buildlog_dir: str = "buildlog",
269
271
  ) -> dict:
270
- """Start a new experiment session.
272
+ """Start a new experiment session with Thompson Sampling rule selection.
271
273
 
272
- Begins tracking for a learning experiment. Captures the current
273
- set of active rules to measure learning over time.
274
+ Begins tracking for a learning experiment. Uses Thompson Sampling
275
+ to select which rules will be "active" for this session based on
276
+ the error class context.
277
+
278
+ The selected rules will receive feedback:
279
+ - Negative feedback (reward=0) when log_mistake() is called
280
+ - Explicit feedback when log_reward() is called
281
+
282
+ This teaches the bandit which rules are effective for which contexts.
274
283
 
275
284
  Args:
276
- error_class: Error class being targeted (e.g., "missing_test")
285
+ error_class: Error class being targeted (e.g., "missing_test").
286
+ This is the CONTEXT for contextual bandits.
277
287
  notes: Notes about this session
288
+ select_k: Number of rules to select via Thompson Sampling
278
289
  buildlog_dir: Path to buildlog directory
279
290
 
280
291
  Returns:
281
- Dict with session_id, error_class, rules_count, message
292
+ Dict with session_id, error_class, rules_count, selected_rules, message
282
293
 
283
294
  Example:
284
- buildlog_start_session(error_class="missing_test")
295
+ buildlog_start_session(error_class="type-errors", select_k=5)
285
296
  """
286
297
  result = start_session(
287
298
  Path(buildlog_dir),
288
299
  error_class=error_class,
289
300
  notes=notes,
301
+ select_k=select_k,
290
302
  )
291
303
  return asdict(result)
292
304
 
293
305
 
294
- def buildlog_end_session(
306
+ def buildlog_experiment_end(
295
307
  entry_file: str | None = None,
296
308
  notes: str | None = None,
297
309
  buildlog_dir: str = "buildlog",
@@ -358,7 +370,7 @@ def buildlog_log_mistake(
358
370
  return asdict(result)
359
371
 
360
372
 
361
- def buildlog_session_metrics(
373
+ def buildlog_experiment_metrics(
362
374
  session_id: str | None = None,
363
375
  buildlog_dir: str = "buildlog",
364
376
  ) -> dict:
@@ -407,6 +419,42 @@ def buildlog_experiment_report(
407
419
  return get_experiment_report(Path(buildlog_dir))
408
420
 
409
421
 
422
+ def buildlog_bandit_status(
423
+ buildlog_dir: str = "buildlog",
424
+ context: str | None = None,
425
+ top_k: int = 10,
426
+ ) -> dict:
427
+ """Get Thompson Sampling bandit status and rule rankings.
428
+
429
+ Shows the bandit's learned beliefs about which rules are effective
430
+ for each error class context. Higher mean = bandit believes rule
431
+ is more effective.
432
+
433
+ The bandit uses Beta distributions to model uncertainty:
434
+ - High variance (wide CI) = uncertain, will explore more
435
+ - Low variance (narrow CI) = confident, will exploit
436
+
437
+ Args:
438
+ buildlog_dir: Path to buildlog directory
439
+ context: Specific error class to filter by (optional)
440
+ top_k: Number of top rules to show per context
441
+
442
+ Returns:
443
+ Dict with:
444
+ - summary: Total contexts, arms, observations
445
+ - top_rules: Best rules per context by expected value
446
+ - all_rules: Full stats if filtering by context
447
+
448
+ Example:
449
+ # See all bandit state
450
+ buildlog_bandit_status()
451
+
452
+ # See state for specific error class
453
+ buildlog_bandit_status(context="type-errors")
454
+ """
455
+ return get_bandit_status(Path(buildlog_dir), context, top_k)
456
+
457
+
410
458
  # -----------------------------------------------------------------------------
411
459
  # Gauntlet Loop MCP Tools
412
460
  # -----------------------------------------------------------------------------