buildlog 0.7.0__py3-none-any.whl → 0.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. buildlog/__init__.py +1 -1
  2. buildlog/cli.py +436 -44
  3. buildlog/confidence.py +27 -0
  4. buildlog/core/__init__.py +2 -0
  5. buildlog/core/bandit.py +699 -0
  6. buildlog/core/operations.py +265 -11
  7. buildlog/distill.py +80 -1
  8. buildlog/engine/__init__.py +61 -0
  9. buildlog/engine/bandit.py +23 -0
  10. buildlog/engine/confidence.py +28 -0
  11. buildlog/engine/embeddings.py +28 -0
  12. buildlog/engine/experiments.py +619 -0
  13. buildlog/engine/types.py +31 -0
  14. buildlog/llm.py +461 -0
  15. buildlog/mcp/server.py +6 -6
  16. buildlog/mcp/tools.py +61 -13
  17. buildlog/render/__init__.py +19 -2
  18. buildlog/render/claude_md.py +67 -32
  19. buildlog/render/continue_dev.py +102 -0
  20. buildlog/render/copilot.py +100 -0
  21. buildlog/render/cursor.py +105 -0
  22. buildlog/render/windsurf.py +95 -0
  23. buildlog/skills.py +69 -6
  24. {buildlog-0.7.0.data → buildlog-0.8.0.data}/data/share/buildlog/copier.yml +0 -4
  25. buildlog-0.8.0.data/data/share/buildlog/template/buildlog/_TEMPLATE_QUICK.md +21 -0
  26. buildlog-0.8.0.dist-info/METADATA +151 -0
  27. buildlog-0.8.0.dist-info/RECORD +54 -0
  28. buildlog-0.7.0.dist-info/METADATA +0 -544
  29. buildlog-0.7.0.dist-info/RECORD +0 -41
  30. {buildlog-0.7.0.data → buildlog-0.8.0.data}/data/share/buildlog/post_gen.py +0 -0
  31. {buildlog-0.7.0.data → buildlog-0.8.0.data}/data/share/buildlog/template/buildlog/.gitkeep +0 -0
  32. {buildlog-0.7.0.data → buildlog-0.8.0.data}/data/share/buildlog/template/buildlog/2026-01-01-example.md +0 -0
  33. {buildlog-0.7.0.data → buildlog-0.8.0.data}/data/share/buildlog/template/buildlog/BUILDLOG_SYSTEM.md +0 -0
  34. {buildlog-0.7.0.data → buildlog-0.8.0.data}/data/share/buildlog/template/buildlog/_TEMPLATE.md +0 -0
  35. {buildlog-0.7.0.data → buildlog-0.8.0.data}/data/share/buildlog/template/buildlog/assets/.gitkeep +0 -0
  36. {buildlog-0.7.0.dist-info → buildlog-0.8.0.dist-info}/WHEEL +0 -0
  37. {buildlog-0.7.0.dist-info → buildlog-0.8.0.dist-info}/entry_points.txt +0 -0
  38. {buildlog-0.7.0.dist-info → buildlog-0.8.0.dist-info}/licenses/LICENSE +0 -0
buildlog/llm.py ADDED
@@ -0,0 +1,461 @@
1
+ """LLM-backed rule extraction, deduplication, and scoring.
2
+
3
+ Provides a provider-agnostic interface for using LLMs to:
4
+ - Extract structured rules from buildlog entries
5
+ - Select canonical forms when deduplicating similar rules
6
+ - Score rules with severity/scope/applicability
7
+
8
+ Provider cascade:
9
+ 1. Explicit config (.buildlog/config.yml or env)
10
+ 2. Injected at call site (API parameter)
11
+ 3. Auto-detect: Ollama -> Anthropic -> None (regex fallback)
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ __all__ = [
17
+ "ExtractedRule",
18
+ "RuleScoring",
19
+ "LLMConfig",
20
+ "LLMBackend",
21
+ "OllamaBackend",
22
+ "AnthropicBackend",
23
+ "PROVIDERS",
24
+ "register_provider",
25
+ "get_llm_backend",
26
+ ]
27
+
28
+ import json
29
+ import logging
30
+ import os
31
+ from dataclasses import dataclass, field
32
+ from pathlib import Path
33
+ from typing import Protocol, runtime_checkable
34
+
35
+ logger = logging.getLogger(__name__)
36
+
37
+ # --- Data types (provider-agnostic) ---
38
+
39
+ VALID_SEVERITIES = ("critical", "major", "minor", "info")
40
+ VALID_SCOPES = ("global", "module", "function")
41
+ VALID_CATEGORIES = ("architectural", "workflow", "tool_usage", "domain_knowledge")
42
+
43
+
44
+ @dataclass
45
+ class ExtractedRule:
46
+ """A rule extracted from buildlog text by an LLM."""
47
+
48
+ rule: str
49
+ category: str # architectural/workflow/tool_usage/domain_knowledge
50
+ severity: str = "info" # critical/major/minor/info
51
+ scope: str = "global" # global/module/function
52
+ applicability: list[str] = field(default_factory=list)
53
+ context: str | None = None # when to apply
54
+ antipattern: str | None = None # what violation looks like
55
+ rationale: str | None = None # why it matters
56
+
57
+ def __post_init__(self) -> None:
58
+ if self.severity not in VALID_SEVERITIES:
59
+ self.severity = "info"
60
+ if self.scope not in VALID_SCOPES:
61
+ self.scope = "global"
62
+ if self.category not in VALID_CATEGORIES:
63
+ self.category = "architectural"
64
+
65
+
66
+ @dataclass
67
+ class RuleScoring:
68
+ """Severity/scope/applicability scoring for a rule."""
69
+
70
+ severity: str = "info"
71
+ scope: str = "global"
72
+ applicability: list[str] = field(default_factory=list)
73
+
74
+
75
+ # --- Provider config ---
76
+
77
+
78
+ @dataclass
79
+ class LLMConfig:
80
+ """Configuration for an LLM provider."""
81
+
82
+ provider: str # "ollama", "anthropic", "openai", ...
83
+ model: str | None = None # None = auto-detect or provider default
84
+ base_url: str | None = None # Override endpoint
85
+ api_key: str | None = None # From config or env var
86
+
87
+ @classmethod
88
+ def from_buildlog_config(cls, buildlog_dir: Path) -> LLMConfig | None:
89
+ """Read from .buildlog/config.yml [llm] section."""
90
+ config_path = buildlog_dir / ".buildlog" / "config.yml"
91
+ if not config_path.exists():
92
+ return None
93
+
94
+ try:
95
+ import yaml
96
+ except ImportError:
97
+ logger.debug("PyYAML not available, skipping config file")
98
+ return None
99
+
100
+ try:
101
+ data = yaml.safe_load(config_path.read_text())
102
+ except Exception:
103
+ logger.warning("Failed to parse %s", config_path)
104
+ return None
105
+
106
+ if not isinstance(data, dict):
107
+ return None
108
+
109
+ llm_config = data.get("llm")
110
+ if not isinstance(llm_config, dict):
111
+ return None
112
+
113
+ provider = llm_config.get("provider")
114
+ if not provider:
115
+ return None
116
+
117
+ return cls(
118
+ provider=str(provider),
119
+ model=llm_config.get("model"),
120
+ base_url=llm_config.get("base_url"),
121
+ api_key=llm_config.get("api_key"),
122
+ )
123
+
124
+ @classmethod
125
+ def auto_detect(cls) -> LLMConfig | None:
126
+ """Ollama running? -> use it. ANTHROPIC_API_KEY? -> use that. Else None."""
127
+ # Try Ollama first (local, no API key needed)
128
+ if _is_ollama_available():
129
+ return cls(provider="ollama")
130
+
131
+ # Try Anthropic (cloud)
132
+ api_key = os.environ.get("ANTHROPIC_API_KEY")
133
+ if api_key:
134
+ return cls(provider="anthropic", api_key=api_key)
135
+
136
+ return None
137
+
138
+
139
+ def _is_ollama_available() -> bool:
140
+ """Check if Ollama is running and accessible."""
141
+ try:
142
+ import ollama as ollama_lib
143
+
144
+ ollama_lib.list()
145
+ return True
146
+ except Exception:
147
+ return False
148
+
149
+
150
+ # --- Interface ---
151
+
152
+
153
+ @runtime_checkable
154
+ class LLMBackend(Protocol):
155
+ """Protocol for LLM backends."""
156
+
157
+ def extract_rules(self, entry_text: str) -> list[ExtractedRule]:
158
+ """Extract structured rules from buildlog entry text."""
159
+ ...
160
+
161
+ def select_canonical(self, candidates: list[str]) -> str:
162
+ """Given similar rules, produce the single best canonical form."""
163
+ ...
164
+
165
+ def score_rule(self, rule: str, context: str) -> RuleScoring:
166
+ """Score a rule with severity/scope/applicability."""
167
+ ...
168
+
169
+
170
+ # --- Prompts ---
171
+
172
+ EXTRACT_RULES_PROMPT = """\
173
+ You are analyzing a buildlog entry's Improvements section. Extract actionable rules.
174
+
175
+ For each rule, return a JSON array of objects with these fields:
176
+ - "rule": string — the actionable rule in imperative form
177
+ - "category": string — one of: architectural, workflow, tool_usage, domain_knowledge
178
+ - "severity": string — one of: critical, major, minor, info
179
+ - "scope": string — one of: global, module, function
180
+ - "applicability": array of strings — contexts where relevant (e.g., "python", "api-design")
181
+ - "context": string or null — when to apply this rule
182
+ - "antipattern": string or null — what violation looks like
183
+ - "rationale": string or null — why it matters
184
+
185
+ Return ONLY a JSON array. No markdown, no explanation.
186
+
187
+ Text to analyze:
188
+ {text}
189
+ """
190
+
191
+ SELECT_CANONICAL_PROMPT = """\
192
+ Given these similar rules, produce the single best canonical form.
193
+ The canonical rule should be clear, concise, and actionable.
194
+
195
+ Similar rules:
196
+ {candidates}
197
+
198
+ Return ONLY the canonical rule text as a plain string. No JSON, no quotes, no explanation.
199
+ """
200
+
201
+ SCORE_RULE_PROMPT = """\
202
+ Score this rule for severity, scope, and applicability.
203
+
204
+ Rule: {rule}
205
+ Context: {context}
206
+
207
+ Return ONLY a JSON object with:
208
+ - "severity": one of: critical, major, minor, info
209
+ - "scope": one of: global, module, function
210
+ - "applicability": array of strings (contexts where relevant)
211
+
212
+ No markdown, no explanation.
213
+ """
214
+
215
+
216
+ def _parse_json_response(text: str) -> list | dict:
217
+ """Parse JSON from LLM response, handling markdown code blocks."""
218
+ text = text.strip()
219
+ # Strip markdown code blocks
220
+ if text.startswith("```"):
221
+ lines = text.split("\n")
222
+ # Remove first and last lines (``` markers)
223
+ lines = [ln for ln in lines[1:] if not ln.strip().startswith("```")]
224
+ text = "\n".join(lines)
225
+ return json.loads(text)
226
+
227
+
228
+ # --- Implementations ---
229
+
230
+
231
+ class OllamaBackend:
232
+ """LLM backend using Ollama (local)."""
233
+
234
+ def __init__(self, model: str | None = None, base_url: str | None = None):
235
+ self._model = model
236
+ self._base_url = base_url
237
+ self._resolved_model: str | None = None
238
+
239
+ def _get_model(self) -> str:
240
+ """Resolve model name, auto-detecting largest if not specified."""
241
+ if self._resolved_model:
242
+ return self._resolved_model
243
+
244
+ if self._model:
245
+ self._resolved_model = self._model
246
+ return self._resolved_model
247
+
248
+ # Auto-detect: pick largest pulled model
249
+ try:
250
+ import ollama as ollama_lib
251
+
252
+ models = ollama_lib.list()
253
+ if not models or not models.get("models"):
254
+ raise RuntimeError(
255
+ "No Ollama models found. Pull one with: ollama pull llama3.2"
256
+ )
257
+
258
+ model_list = models["models"]
259
+ # Sort by size descending, pick largest
260
+ largest = max(model_list, key=lambda m: m.get("size", 0))
261
+ model_name: str = largest["name"]
262
+ self._resolved_model = model_name
263
+ logger.info("Auto-detected Ollama model: %s", model_name)
264
+ return model_name
265
+ except ImportError:
266
+ raise ImportError(
267
+ "ollama package is required. Install with: pip install buildlog[ollama]"
268
+ )
269
+
270
+ def _chat(self, prompt: str) -> str:
271
+ """Send a prompt to Ollama and return the response text."""
272
+ import ollama as ollama_lib
273
+
274
+ kwargs = {
275
+ "model": self._get_model(),
276
+ "messages": [{"role": "user", "content": prompt}],
277
+ }
278
+ if self._base_url:
279
+ client = ollama_lib.Client(host=self._base_url)
280
+ response = client.chat(**kwargs)
281
+ else:
282
+ response = ollama_lib.chat(**kwargs)
283
+ return response["message"]["content"]
284
+
285
+ def extract_rules(self, entry_text: str) -> list[ExtractedRule]:
286
+ """Extract structured rules from buildlog entry text."""
287
+ prompt = EXTRACT_RULES_PROMPT.format(text=entry_text)
288
+ try:
289
+ response = self._chat(prompt)
290
+ parsed = _parse_json_response(response)
291
+ if not isinstance(parsed, list):
292
+ parsed = [parsed]
293
+ return [ExtractedRule(**item) for item in parsed]
294
+ except Exception as e:
295
+ logger.warning("Ollama extraction failed: %s", e)
296
+ return []
297
+
298
+ def select_canonical(self, candidates: list[str]) -> str:
299
+ """Given similar rules, produce the single best canonical form."""
300
+ numbered = "\n".join(f"{i+1}. {c}" for i, c in enumerate(candidates))
301
+ prompt = SELECT_CANONICAL_PROMPT.format(candidates=numbered)
302
+ try:
303
+ response = self._chat(prompt)
304
+ return response.strip().strip('"').strip("'")
305
+ except Exception as e:
306
+ logger.warning("Ollama canonical selection failed: %s", e)
307
+ return min(candidates, key=len)
308
+
309
+ def score_rule(self, rule: str, context: str) -> RuleScoring:
310
+ """Score a rule with severity/scope/applicability."""
311
+ prompt = SCORE_RULE_PROMPT.format(rule=rule, context=context)
312
+ try:
313
+ response = self._chat(prompt)
314
+ parsed = _parse_json_response(response)
315
+ if isinstance(parsed, dict):
316
+ return RuleScoring(
317
+ severity=parsed.get("severity", "info"),
318
+ scope=parsed.get("scope", "global"),
319
+ applicability=parsed.get("applicability", []),
320
+ )
321
+ except Exception as e:
322
+ logger.warning("Ollama scoring failed: %s", e)
323
+ return RuleScoring()
324
+
325
+
326
+ class AnthropicBackend:
327
+ """LLM backend using Anthropic Claude API."""
328
+
329
+ def __init__(
330
+ self,
331
+ model: str | None = None,
332
+ api_key: str | None = None,
333
+ ):
334
+ self._model = model or "claude-haiku-4-20250514"
335
+ self._api_key = api_key or os.environ.get("ANTHROPIC_API_KEY")
336
+ self._client = None
337
+
338
+ def _get_client(self):
339
+ """Lazy-load the Anthropic client."""
340
+ if self._client is None:
341
+ try:
342
+ import anthropic
343
+ except ImportError:
344
+ raise ImportError(
345
+ "anthropic package is required. Install with: pip install buildlog[anthropic]"
346
+ )
347
+ if not self._api_key:
348
+ raise ValueError("ANTHROPIC_API_KEY is required for Anthropic backend")
349
+ self._client = anthropic.Anthropic(api_key=self._api_key)
350
+ return self._client
351
+
352
+ def _chat(self, prompt: str) -> str:
353
+ """Send a prompt to Claude and return the response text."""
354
+ client = self._get_client()
355
+ response = client.messages.create(
356
+ model=self._model,
357
+ max_tokens=2048,
358
+ messages=[{"role": "user", "content": prompt}],
359
+ )
360
+ return response.content[0].text
361
+
362
+ def extract_rules(self, entry_text: str) -> list[ExtractedRule]:
363
+ """Extract structured rules from buildlog entry text."""
364
+ prompt = EXTRACT_RULES_PROMPT.format(text=entry_text)
365
+ try:
366
+ response = self._chat(prompt)
367
+ parsed = _parse_json_response(response)
368
+ if not isinstance(parsed, list):
369
+ parsed = [parsed]
370
+ return [ExtractedRule(**item) for item in parsed]
371
+ except Exception as e:
372
+ logger.warning("Anthropic extraction failed: %s", e)
373
+ return []
374
+
375
+ def select_canonical(self, candidates: list[str]) -> str:
376
+ """Given similar rules, produce the single best canonical form."""
377
+ numbered = "\n".join(f"{i+1}. {c}" for i, c in enumerate(candidates))
378
+ prompt = SELECT_CANONICAL_PROMPT.format(candidates=numbered)
379
+ try:
380
+ response = self._chat(prompt)
381
+ return response.strip().strip('"').strip("'")
382
+ except Exception as e:
383
+ logger.warning("Anthropic canonical selection failed: %s", e)
384
+ return min(candidates, key=len)
385
+
386
+ def score_rule(self, rule: str, context: str) -> RuleScoring:
387
+ """Score a rule with severity/scope/applicability."""
388
+ prompt = SCORE_RULE_PROMPT.format(rule=rule, context=context)
389
+ try:
390
+ response = self._chat(prompt)
391
+ parsed = _parse_json_response(response)
392
+ if isinstance(parsed, dict):
393
+ return RuleScoring(
394
+ severity=parsed.get("severity", "info"),
395
+ scope=parsed.get("scope", "global"),
396
+ applicability=parsed.get("applicability", []),
397
+ )
398
+ except Exception as e:
399
+ logger.warning("Anthropic scoring failed: %s", e)
400
+ return RuleScoring()
401
+
402
+
403
+ # --- Registry ---
404
+
405
+ PROVIDERS: dict[str, type] = {
406
+ "ollama": OllamaBackend,
407
+ "anthropic": AnthropicBackend,
408
+ }
409
+
410
+
411
+ def register_provider(name: str, cls: type) -> None:
412
+ """Register a new LLM provider backend."""
413
+ PROVIDERS[name] = cls
414
+
415
+
416
+ def get_llm_backend(
417
+ config: LLMConfig | None = None,
418
+ buildlog_dir: Path | None = None,
419
+ ) -> LLMBackend | None:
420
+ """Get an LLM backend using the provider cascade.
421
+
422
+ Resolution order:
423
+ 1. Explicit config parameter (highest priority)
424
+ 2. Config file (.buildlog/config.yml)
425
+ 3. Auto-detect: Ollama -> Anthropic -> None
426
+
427
+ Returns None if no provider is available (regex fallback).
428
+ """
429
+ # 1. Explicit config
430
+ if config is None and buildlog_dir is not None:
431
+ # 2. Config file
432
+ config = LLMConfig.from_buildlog_config(buildlog_dir)
433
+
434
+ if config is None:
435
+ # 3. Auto-detect
436
+ config = LLMConfig.auto_detect()
437
+
438
+ if config is None:
439
+ logger.info("No LLM provider available, using regex fallback")
440
+ return None
441
+
442
+ provider_cls = PROVIDERS.get(config.provider)
443
+ if provider_cls is None:
444
+ logger.warning("Unknown LLM provider: %s", config.provider)
445
+ return None
446
+
447
+ try:
448
+ kwargs: dict = {}
449
+ if config.model:
450
+ kwargs["model"] = config.model
451
+ if config.provider == "ollama" and config.base_url:
452
+ kwargs["base_url"] = config.base_url
453
+ if config.provider == "anthropic" and config.api_key:
454
+ kwargs["api_key"] = config.api_key
455
+
456
+ backend = provider_cls(**kwargs)
457
+ logger.info("Using LLM provider: %s", config.provider)
458
+ return backend
459
+ except Exception as e:
460
+ logger.warning("Failed to initialize %s backend: %s", config.provider, e)
461
+ return None
buildlog/mcp/server.py CHANGED
@@ -6,8 +6,10 @@ from mcp.server.fastmcp import FastMCP
6
6
 
7
7
  from buildlog.mcp.tools import (
8
8
  buildlog_diff,
9
- buildlog_end_session,
9
+ buildlog_experiment_end,
10
+ buildlog_experiment_metrics,
10
11
  buildlog_experiment_report,
12
+ buildlog_experiment_start,
11
13
  buildlog_gauntlet_accept_risk,
12
14
  buildlog_gauntlet_issues,
13
15
  buildlog_learn_from_review,
@@ -16,8 +18,6 @@ from buildlog.mcp.tools import (
16
18
  buildlog_promote,
17
19
  buildlog_reject,
18
20
  buildlog_rewards,
19
- buildlog_session_metrics,
20
- buildlog_start_session,
21
21
  buildlog_status,
22
22
  )
23
23
 
@@ -33,10 +33,10 @@ mcp.tool()(buildlog_log_reward)
33
33
  mcp.tool()(buildlog_rewards)
34
34
 
35
35
  # Session tracking tools (experiment infrastructure)
36
- mcp.tool()(buildlog_start_session)
37
- mcp.tool()(buildlog_end_session)
36
+ mcp.tool()(buildlog_experiment_start)
37
+ mcp.tool()(buildlog_experiment_end)
38
38
  mcp.tool()(buildlog_log_mistake)
39
- mcp.tool()(buildlog_session_metrics)
39
+ mcp.tool()(buildlog_experiment_metrics)
40
40
  mcp.tool()(buildlog_experiment_report)
41
41
 
42
42
  # Gauntlet loop tools
buildlog/mcp/tools.py CHANGED
@@ -12,6 +12,7 @@ from typing import Literal
12
12
  from buildlog.core import (
13
13
  diff,
14
14
  end_session,
15
+ get_bandit_status,
15
16
  get_experiment_report,
16
17
  get_rewards,
17
18
  get_session_metrics,
@@ -52,17 +53,17 @@ def buildlog_status(
52
53
 
53
54
  def buildlog_promote(
54
55
  skill_ids: list[str],
55
- target: Literal["claude_md", "settings_json", "skill"] = "claude_md",
56
+ target: str = "claude_md",
56
57
  buildlog_dir: str = "buildlog",
57
58
  ) -> dict:
58
59
  """Promote skills to your agent's rules.
59
60
 
60
- Writes selected skills to CLAUDE.md, .claude/settings.json, or
61
- .claude/skills/buildlog-learned/SKILL.md (Anthropic Agent Skills format).
61
+ Writes selected skills to agent-specific rule files.
62
62
 
63
63
  Args:
64
64
  skill_ids: List of skill IDs to promote (e.g., ["arch-b0fcb62a1e"])
65
- target: Where to write rules ("claude_md", "settings_json", or "skill")
65
+ target: Where to write rules. One of: claude_md, settings_json,
66
+ skill, cursor, copilot, windsurf, continue_dev.
66
67
  buildlog_dir: Path to buildlog directory
67
68
 
68
69
  Returns:
@@ -262,36 +263,47 @@ def buildlog_rewards(
262
263
  # -----------------------------------------------------------------------------
263
264
 
264
265
 
265
- def buildlog_start_session(
266
+ def buildlog_experiment_start(
266
267
  error_class: str | None = None,
267
268
  notes: str | None = None,
269
+ select_k: int = 3,
268
270
  buildlog_dir: str = "buildlog",
269
271
  ) -> dict:
270
- """Start a new experiment session.
272
+ """Start a new experiment session with Thompson Sampling rule selection.
271
273
 
272
- Begins tracking for a learning experiment. Captures the current
273
- set of active rules to measure learning over time.
274
+ Begins tracking for a learning experiment. Uses Thompson Sampling
275
+ to select which rules will be "active" for this session based on
276
+ the error class context.
277
+
278
+ The selected rules will receive feedback:
279
+ - Negative feedback (reward=0) when log_mistake() is called
280
+ - Explicit feedback when log_reward() is called
281
+
282
+ This teaches the bandit which rules are effective for which contexts.
274
283
 
275
284
  Args:
276
- error_class: Error class being targeted (e.g., "missing_test")
285
+ error_class: Error class being targeted (e.g., "missing_test").
286
+ This is the CONTEXT for contextual bandits.
277
287
  notes: Notes about this session
288
+ select_k: Number of rules to select via Thompson Sampling
278
289
  buildlog_dir: Path to buildlog directory
279
290
 
280
291
  Returns:
281
- Dict with session_id, error_class, rules_count, message
292
+ Dict with session_id, error_class, rules_count, selected_rules, message
282
293
 
283
294
  Example:
284
- buildlog_start_session(error_class="missing_test")
295
+ buildlog_start_session(error_class="type-errors", select_k=5)
285
296
  """
286
297
  result = start_session(
287
298
  Path(buildlog_dir),
288
299
  error_class=error_class,
289
300
  notes=notes,
301
+ select_k=select_k,
290
302
  )
291
303
  return asdict(result)
292
304
 
293
305
 
294
- def buildlog_end_session(
306
+ def buildlog_experiment_end(
295
307
  entry_file: str | None = None,
296
308
  notes: str | None = None,
297
309
  buildlog_dir: str = "buildlog",
@@ -358,7 +370,7 @@ def buildlog_log_mistake(
358
370
  return asdict(result)
359
371
 
360
372
 
361
- def buildlog_session_metrics(
373
+ def buildlog_experiment_metrics(
362
374
  session_id: str | None = None,
363
375
  buildlog_dir: str = "buildlog",
364
376
  ) -> dict:
@@ -407,6 +419,42 @@ def buildlog_experiment_report(
407
419
  return get_experiment_report(Path(buildlog_dir))
408
420
 
409
421
 
422
+ def buildlog_bandit_status(
423
+ buildlog_dir: str = "buildlog",
424
+ context: str | None = None,
425
+ top_k: int = 10,
426
+ ) -> dict:
427
+ """Get Thompson Sampling bandit status and rule rankings.
428
+
429
+ Shows the bandit's learned beliefs about which rules are effective
430
+ for each error class context. Higher mean = bandit believes rule
431
+ is more effective.
432
+
433
+ The bandit uses Beta distributions to model uncertainty:
434
+ - High variance (wide CI) = uncertain, will explore more
435
+ - Low variance (narrow CI) = confident, will exploit
436
+
437
+ Args:
438
+ buildlog_dir: Path to buildlog directory
439
+ context: Specific error class to filter by (optional)
440
+ top_k: Number of top rules to show per context
441
+
442
+ Returns:
443
+ Dict with:
444
+ - summary: Total contexts, arms, observations
445
+ - top_rules: Best rules per context by expected value
446
+ - all_rules: Full stats if filtering by context
447
+
448
+ Example:
449
+ # See all bandit state
450
+ buildlog_bandit_status()
451
+
452
+ # See state for specific error class
453
+ buildlog_bandit_status(context="type-errors")
454
+ """
455
+ return get_bandit_status(Path(buildlog_dir), context, top_k)
456
+
457
+
410
458
  # -----------------------------------------------------------------------------
411
459
  # Gauntlet Loop MCP Tools
412
460
  # -----------------------------------------------------------------------------
@@ -7,8 +7,12 @@ from typing import TYPE_CHECKING, Literal
7
7
 
8
8
  from buildlog.render.base import RenderTarget
9
9
  from buildlog.render.claude_md import ClaudeMdRenderer
10
+ from buildlog.render.continue_dev import ContinueRenderer
11
+ from buildlog.render.copilot import CopilotRenderer
12
+ from buildlog.render.cursor import CursorRenderer
10
13
  from buildlog.render.settings_json import SettingsJsonRenderer
11
14
  from buildlog.render.skill import SkillRenderer
15
+ from buildlog.render.windsurf import WindsurfRenderer
12
16
 
13
17
  if TYPE_CHECKING:
14
18
  from typing import Any
@@ -18,8 +22,13 @@ __all__ = [
18
22
  "ClaudeMdRenderer",
19
23
  "SettingsJsonRenderer",
20
24
  "SkillRenderer",
25
+ "CursorRenderer",
26
+ "CopilotRenderer",
27
+ "WindsurfRenderer",
28
+ "ContinueRenderer",
21
29
  "get_renderer",
22
30
  "RENDERERS",
31
+ "RENDER_TARGETS",
23
32
  ]
24
33
 
25
34
  # Registry of available renderers
@@ -28,18 +37,26 @@ RENDERERS: dict[str, type[RenderTarget]] = {
28
37
  "claude_md": ClaudeMdRenderer,
29
38
  "settings_json": SettingsJsonRenderer,
30
39
  "skill": SkillRenderer,
40
+ "cursor": CursorRenderer,
41
+ "copilot": CopilotRenderer,
42
+ "windsurf": WindsurfRenderer,
43
+ "continue_dev": ContinueRenderer,
31
44
  }
32
45
 
46
+ # Valid target names (useful for CLI choices and type hints)
47
+ RENDER_TARGETS = list(RENDERERS.keys())
48
+
33
49
 
34
50
  def get_renderer(
35
- target: Literal["claude_md", "settings_json", "skill"],
51
+ target: str,
36
52
  path: Path | None = None,
37
53
  **kwargs: Any,
38
54
  ) -> RenderTarget:
39
55
  """Get renderer for target.
40
56
 
41
57
  Args:
42
- target: Target format - "claude_md", "settings_json", or "skill".
58
+ target: Target format - one of: claude_md, settings_json, skill,
59
+ cursor, copilot, windsurf, continue_dev.
43
60
  path: Optional custom path for the target file.
44
61
  **kwargs: Additional arguments passed to the renderer constructor.
45
62
  Common kwargs (accepted by all renderers):