buildlog 0.6.1__py3-none-any.whl → 0.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. buildlog/__init__.py +1 -1
  2. buildlog/cli.py +589 -44
  3. buildlog/confidence.py +27 -0
  4. buildlog/core/__init__.py +12 -0
  5. buildlog/core/bandit.py +699 -0
  6. buildlog/core/operations.py +499 -11
  7. buildlog/distill.py +80 -1
  8. buildlog/engine/__init__.py +61 -0
  9. buildlog/engine/bandit.py +23 -0
  10. buildlog/engine/confidence.py +28 -0
  11. buildlog/engine/embeddings.py +28 -0
  12. buildlog/engine/experiments.py +619 -0
  13. buildlog/engine/types.py +31 -0
  14. buildlog/llm.py +461 -0
  15. buildlog/mcp/server.py +12 -6
  16. buildlog/mcp/tools.py +166 -13
  17. buildlog/render/__init__.py +19 -2
  18. buildlog/render/claude_md.py +74 -26
  19. buildlog/render/continue_dev.py +102 -0
  20. buildlog/render/copilot.py +100 -0
  21. buildlog/render/cursor.py +105 -0
  22. buildlog/render/tracking.py +20 -1
  23. buildlog/render/windsurf.py +95 -0
  24. buildlog/seeds.py +41 -0
  25. buildlog/skills.py +69 -6
  26. {buildlog-0.6.1.data → buildlog-0.8.0.data}/data/share/buildlog/copier.yml +0 -4
  27. buildlog-0.8.0.data/data/share/buildlog/template/buildlog/_TEMPLATE_QUICK.md +21 -0
  28. buildlog-0.8.0.dist-info/METADATA +151 -0
  29. buildlog-0.8.0.dist-info/RECORD +54 -0
  30. buildlog-0.6.1.dist-info/METADATA +0 -490
  31. buildlog-0.6.1.dist-info/RECORD +0 -41
  32. {buildlog-0.6.1.data → buildlog-0.8.0.data}/data/share/buildlog/post_gen.py +0 -0
  33. {buildlog-0.6.1.data → buildlog-0.8.0.data}/data/share/buildlog/template/buildlog/.gitkeep +0 -0
  34. {buildlog-0.6.1.data → buildlog-0.8.0.data}/data/share/buildlog/template/buildlog/2026-01-01-example.md +0 -0
  35. {buildlog-0.6.1.data → buildlog-0.8.0.data}/data/share/buildlog/template/buildlog/BUILDLOG_SYSTEM.md +0 -0
  36. {buildlog-0.6.1.data → buildlog-0.8.0.data}/data/share/buildlog/template/buildlog/_TEMPLATE.md +0 -0
  37. {buildlog-0.6.1.data → buildlog-0.8.0.data}/data/share/buildlog/template/buildlog/assets/.gitkeep +0 -0
  38. {buildlog-0.6.1.dist-info → buildlog-0.8.0.dist-info}/WHEEL +0 -0
  39. {buildlog-0.6.1.dist-info → buildlog-0.8.0.dist-info}/entry_points.txt +0 -0
  40. {buildlog-0.6.1.dist-info → buildlog-0.8.0.dist-info}/licenses/LICENSE +0 -0
buildlog/llm.py ADDED
@@ -0,0 +1,461 @@
1
+ """LLM-backed rule extraction, deduplication, and scoring.
2
+
3
+ Provides a provider-agnostic interface for using LLMs to:
4
+ - Extract structured rules from buildlog entries
5
+ - Select canonical forms when deduplicating similar rules
6
+ - Score rules with severity/scope/applicability
7
+
8
+ Provider cascade:
9
+ 1. Explicit config (.buildlog/config.yml or env)
10
+ 2. Injected at call site (API parameter)
11
+ 3. Auto-detect: Ollama -> Anthropic -> None (regex fallback)
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ __all__ = [
17
+ "ExtractedRule",
18
+ "RuleScoring",
19
+ "LLMConfig",
20
+ "LLMBackend",
21
+ "OllamaBackend",
22
+ "AnthropicBackend",
23
+ "PROVIDERS",
24
+ "register_provider",
25
+ "get_llm_backend",
26
+ ]
27
+
28
+ import json
29
+ import logging
30
+ import os
31
+ from dataclasses import dataclass, field
32
+ from pathlib import Path
33
+ from typing import Protocol, runtime_checkable
34
+
35
+ logger = logging.getLogger(__name__)
36
+
37
+ # --- Data types (provider-agnostic) ---
38
+
39
+ VALID_SEVERITIES = ("critical", "major", "minor", "info")
40
+ VALID_SCOPES = ("global", "module", "function")
41
+ VALID_CATEGORIES = ("architectural", "workflow", "tool_usage", "domain_knowledge")
42
+
43
+
44
+ @dataclass
45
+ class ExtractedRule:
46
+ """A rule extracted from buildlog text by an LLM."""
47
+
48
+ rule: str
49
+ category: str # architectural/workflow/tool_usage/domain_knowledge
50
+ severity: str = "info" # critical/major/minor/info
51
+ scope: str = "global" # global/module/function
52
+ applicability: list[str] = field(default_factory=list)
53
+ context: str | None = None # when to apply
54
+ antipattern: str | None = None # what violation looks like
55
+ rationale: str | None = None # why it matters
56
+
57
+ def __post_init__(self) -> None:
58
+ if self.severity not in VALID_SEVERITIES:
59
+ self.severity = "info"
60
+ if self.scope not in VALID_SCOPES:
61
+ self.scope = "global"
62
+ if self.category not in VALID_CATEGORIES:
63
+ self.category = "architectural"
64
+
65
+
66
+ @dataclass
67
+ class RuleScoring:
68
+ """Severity/scope/applicability scoring for a rule."""
69
+
70
+ severity: str = "info"
71
+ scope: str = "global"
72
+ applicability: list[str] = field(default_factory=list)
73
+
74
+
75
+ # --- Provider config ---
76
+
77
+
78
+ @dataclass
79
+ class LLMConfig:
80
+ """Configuration for an LLM provider."""
81
+
82
+ provider: str # "ollama", "anthropic", "openai", ...
83
+ model: str | None = None # None = auto-detect or provider default
84
+ base_url: str | None = None # Override endpoint
85
+ api_key: str | None = None # From config or env var
86
+
87
+ @classmethod
88
+ def from_buildlog_config(cls, buildlog_dir: Path) -> LLMConfig | None:
89
+ """Read from .buildlog/config.yml [llm] section."""
90
+ config_path = buildlog_dir / ".buildlog" / "config.yml"
91
+ if not config_path.exists():
92
+ return None
93
+
94
+ try:
95
+ import yaml
96
+ except ImportError:
97
+ logger.debug("PyYAML not available, skipping config file")
98
+ return None
99
+
100
+ try:
101
+ data = yaml.safe_load(config_path.read_text())
102
+ except Exception:
103
+ logger.warning("Failed to parse %s", config_path)
104
+ return None
105
+
106
+ if not isinstance(data, dict):
107
+ return None
108
+
109
+ llm_config = data.get("llm")
110
+ if not isinstance(llm_config, dict):
111
+ return None
112
+
113
+ provider = llm_config.get("provider")
114
+ if not provider:
115
+ return None
116
+
117
+ return cls(
118
+ provider=str(provider),
119
+ model=llm_config.get("model"),
120
+ base_url=llm_config.get("base_url"),
121
+ api_key=llm_config.get("api_key"),
122
+ )
123
+
124
+ @classmethod
125
+ def auto_detect(cls) -> LLMConfig | None:
126
+ """Ollama running? -> use it. ANTHROPIC_API_KEY? -> use that. Else None."""
127
+ # Try Ollama first (local, no API key needed)
128
+ if _is_ollama_available():
129
+ return cls(provider="ollama")
130
+
131
+ # Try Anthropic (cloud)
132
+ api_key = os.environ.get("ANTHROPIC_API_KEY")
133
+ if api_key:
134
+ return cls(provider="anthropic", api_key=api_key)
135
+
136
+ return None
137
+
138
+
139
+ def _is_ollama_available() -> bool:
140
+ """Check if Ollama is running and accessible."""
141
+ try:
142
+ import ollama as ollama_lib
143
+
144
+ ollama_lib.list()
145
+ return True
146
+ except Exception:
147
+ return False
148
+
149
+
150
+ # --- Interface ---
151
+
152
+
153
+ @runtime_checkable
154
+ class LLMBackend(Protocol):
155
+ """Protocol for LLM backends."""
156
+
157
+ def extract_rules(self, entry_text: str) -> list[ExtractedRule]:
158
+ """Extract structured rules from buildlog entry text."""
159
+ ...
160
+
161
+ def select_canonical(self, candidates: list[str]) -> str:
162
+ """Given similar rules, produce the single best canonical form."""
163
+ ...
164
+
165
+ def score_rule(self, rule: str, context: str) -> RuleScoring:
166
+ """Score a rule with severity/scope/applicability."""
167
+ ...
168
+
169
+
170
+ # --- Prompts ---
171
+
172
+ EXTRACT_RULES_PROMPT = """\
173
+ You are analyzing a buildlog entry's Improvements section. Extract actionable rules.
174
+
175
+ For each rule, return a JSON array of objects with these fields:
176
+ - "rule": string — the actionable rule in imperative form
177
+ - "category": string — one of: architectural, workflow, tool_usage, domain_knowledge
178
+ - "severity": string — one of: critical, major, minor, info
179
+ - "scope": string — one of: global, module, function
180
+ - "applicability": array of strings — contexts where relevant (e.g., "python", "api-design")
181
+ - "context": string or null — when to apply this rule
182
+ - "antipattern": string or null — what violation looks like
183
+ - "rationale": string or null — why it matters
184
+
185
+ Return ONLY a JSON array. No markdown, no explanation.
186
+
187
+ Text to analyze:
188
+ {text}
189
+ """
190
+
191
+ SELECT_CANONICAL_PROMPT = """\
192
+ Given these similar rules, produce the single best canonical form.
193
+ The canonical rule should be clear, concise, and actionable.
194
+
195
+ Similar rules:
196
+ {candidates}
197
+
198
+ Return ONLY the canonical rule text as a plain string. No JSON, no quotes, no explanation.
199
+ """
200
+
201
+ SCORE_RULE_PROMPT = """\
202
+ Score this rule for severity, scope, and applicability.
203
+
204
+ Rule: {rule}
205
+ Context: {context}
206
+
207
+ Return ONLY a JSON object with:
208
+ - "severity": one of: critical, major, minor, info
209
+ - "scope": one of: global, module, function
210
+ - "applicability": array of strings (contexts where relevant)
211
+
212
+ No markdown, no explanation.
213
+ """
214
+
215
+
216
+ def _parse_json_response(text: str) -> list | dict:
217
+ """Parse JSON from LLM response, handling markdown code blocks."""
218
+ text = text.strip()
219
+ # Strip markdown code blocks
220
+ if text.startswith("```"):
221
+ lines = text.split("\n")
222
+ # Remove first and last lines (``` markers)
223
+ lines = [ln for ln in lines[1:] if not ln.strip().startswith("```")]
224
+ text = "\n".join(lines)
225
+ return json.loads(text)
226
+
227
+
228
+ # --- Implementations ---
229
+
230
+
231
+ class OllamaBackend:
232
+ """LLM backend using Ollama (local)."""
233
+
234
+ def __init__(self, model: str | None = None, base_url: str | None = None):
235
+ self._model = model
236
+ self._base_url = base_url
237
+ self._resolved_model: str | None = None
238
+
239
+ def _get_model(self) -> str:
240
+ """Resolve model name, auto-detecting largest if not specified."""
241
+ if self._resolved_model:
242
+ return self._resolved_model
243
+
244
+ if self._model:
245
+ self._resolved_model = self._model
246
+ return self._resolved_model
247
+
248
+ # Auto-detect: pick largest pulled model
249
+ try:
250
+ import ollama as ollama_lib
251
+
252
+ models = ollama_lib.list()
253
+ if not models or not models.get("models"):
254
+ raise RuntimeError(
255
+ "No Ollama models found. Pull one with: ollama pull llama3.2"
256
+ )
257
+
258
+ model_list = models["models"]
259
+ # Sort by size descending, pick largest
260
+ largest = max(model_list, key=lambda m: m.get("size", 0))
261
+ model_name: str = largest["name"]
262
+ self._resolved_model = model_name
263
+ logger.info("Auto-detected Ollama model: %s", model_name)
264
+ return model_name
265
+ except ImportError:
266
+ raise ImportError(
267
+ "ollama package is required. Install with: pip install buildlog[ollama]"
268
+ )
269
+
270
+ def _chat(self, prompt: str) -> str:
271
+ """Send a prompt to Ollama and return the response text."""
272
+ import ollama as ollama_lib
273
+
274
+ kwargs = {
275
+ "model": self._get_model(),
276
+ "messages": [{"role": "user", "content": prompt}],
277
+ }
278
+ if self._base_url:
279
+ client = ollama_lib.Client(host=self._base_url)
280
+ response = client.chat(**kwargs)
281
+ else:
282
+ response = ollama_lib.chat(**kwargs)
283
+ return response["message"]["content"]
284
+
285
+ def extract_rules(self, entry_text: str) -> list[ExtractedRule]:
286
+ """Extract structured rules from buildlog entry text."""
287
+ prompt = EXTRACT_RULES_PROMPT.format(text=entry_text)
288
+ try:
289
+ response = self._chat(prompt)
290
+ parsed = _parse_json_response(response)
291
+ if not isinstance(parsed, list):
292
+ parsed = [parsed]
293
+ return [ExtractedRule(**item) for item in parsed]
294
+ except Exception as e:
295
+ logger.warning("Ollama extraction failed: %s", e)
296
+ return []
297
+
298
+ def select_canonical(self, candidates: list[str]) -> str:
299
+ """Given similar rules, produce the single best canonical form."""
300
+ numbered = "\n".join(f"{i+1}. {c}" for i, c in enumerate(candidates))
301
+ prompt = SELECT_CANONICAL_PROMPT.format(candidates=numbered)
302
+ try:
303
+ response = self._chat(prompt)
304
+ return response.strip().strip('"').strip("'")
305
+ except Exception as e:
306
+ logger.warning("Ollama canonical selection failed: %s", e)
307
+ return min(candidates, key=len)
308
+
309
+ def score_rule(self, rule: str, context: str) -> RuleScoring:
310
+ """Score a rule with severity/scope/applicability."""
311
+ prompt = SCORE_RULE_PROMPT.format(rule=rule, context=context)
312
+ try:
313
+ response = self._chat(prompt)
314
+ parsed = _parse_json_response(response)
315
+ if isinstance(parsed, dict):
316
+ return RuleScoring(
317
+ severity=parsed.get("severity", "info"),
318
+ scope=parsed.get("scope", "global"),
319
+ applicability=parsed.get("applicability", []),
320
+ )
321
+ except Exception as e:
322
+ logger.warning("Ollama scoring failed: %s", e)
323
+ return RuleScoring()
324
+
325
+
326
+ class AnthropicBackend:
327
+ """LLM backend using Anthropic Claude API."""
328
+
329
+ def __init__(
330
+ self,
331
+ model: str | None = None,
332
+ api_key: str | None = None,
333
+ ):
334
+ self._model = model or "claude-haiku-4-20250514"
335
+ self._api_key = api_key or os.environ.get("ANTHROPIC_API_KEY")
336
+ self._client = None
337
+
338
+ def _get_client(self):
339
+ """Lazy-load the Anthropic client."""
340
+ if self._client is None:
341
+ try:
342
+ import anthropic
343
+ except ImportError:
344
+ raise ImportError(
345
+ "anthropic package is required. Install with: pip install buildlog[anthropic]"
346
+ )
347
+ if not self._api_key:
348
+ raise ValueError("ANTHROPIC_API_KEY is required for Anthropic backend")
349
+ self._client = anthropic.Anthropic(api_key=self._api_key)
350
+ return self._client
351
+
352
+ def _chat(self, prompt: str) -> str:
353
+ """Send a prompt to Claude and return the response text."""
354
+ client = self._get_client()
355
+ response = client.messages.create(
356
+ model=self._model,
357
+ max_tokens=2048,
358
+ messages=[{"role": "user", "content": prompt}],
359
+ )
360
+ return response.content[0].text
361
+
362
+ def extract_rules(self, entry_text: str) -> list[ExtractedRule]:
363
+ """Extract structured rules from buildlog entry text."""
364
+ prompt = EXTRACT_RULES_PROMPT.format(text=entry_text)
365
+ try:
366
+ response = self._chat(prompt)
367
+ parsed = _parse_json_response(response)
368
+ if not isinstance(parsed, list):
369
+ parsed = [parsed]
370
+ return [ExtractedRule(**item) for item in parsed]
371
+ except Exception as e:
372
+ logger.warning("Anthropic extraction failed: %s", e)
373
+ return []
374
+
375
+ def select_canonical(self, candidates: list[str]) -> str:
376
+ """Given similar rules, produce the single best canonical form."""
377
+ numbered = "\n".join(f"{i+1}. {c}" for i, c in enumerate(candidates))
378
+ prompt = SELECT_CANONICAL_PROMPT.format(candidates=numbered)
379
+ try:
380
+ response = self._chat(prompt)
381
+ return response.strip().strip('"').strip("'")
382
+ except Exception as e:
383
+ logger.warning("Anthropic canonical selection failed: %s", e)
384
+ return min(candidates, key=len)
385
+
386
+ def score_rule(self, rule: str, context: str) -> RuleScoring:
387
+ """Score a rule with severity/scope/applicability."""
388
+ prompt = SCORE_RULE_PROMPT.format(rule=rule, context=context)
389
+ try:
390
+ response = self._chat(prompt)
391
+ parsed = _parse_json_response(response)
392
+ if isinstance(parsed, dict):
393
+ return RuleScoring(
394
+ severity=parsed.get("severity", "info"),
395
+ scope=parsed.get("scope", "global"),
396
+ applicability=parsed.get("applicability", []),
397
+ )
398
+ except Exception as e:
399
+ logger.warning("Anthropic scoring failed: %s", e)
400
+ return RuleScoring()
401
+
402
+
403
+ # --- Registry ---
404
+
405
+ PROVIDERS: dict[str, type] = {
406
+ "ollama": OllamaBackend,
407
+ "anthropic": AnthropicBackend,
408
+ }
409
+
410
+
411
+ def register_provider(name: str, cls: type) -> None:
412
+ """Register a new LLM provider backend."""
413
+ PROVIDERS[name] = cls
414
+
415
+
416
+ def get_llm_backend(
417
+ config: LLMConfig | None = None,
418
+ buildlog_dir: Path | None = None,
419
+ ) -> LLMBackend | None:
420
+ """Get an LLM backend using the provider cascade.
421
+
422
+ Resolution order:
423
+ 1. Explicit config parameter (highest priority)
424
+ 2. Config file (.buildlog/config.yml)
425
+ 3. Auto-detect: Ollama -> Anthropic -> None
426
+
427
+ Returns None if no provider is available (regex fallback).
428
+ """
429
+ # 1. Explicit config
430
+ if config is None and buildlog_dir is not None:
431
+ # 2. Config file
432
+ config = LLMConfig.from_buildlog_config(buildlog_dir)
433
+
434
+ if config is None:
435
+ # 3. Auto-detect
436
+ config = LLMConfig.auto_detect()
437
+
438
+ if config is None:
439
+ logger.info("No LLM provider available, using regex fallback")
440
+ return None
441
+
442
+ provider_cls = PROVIDERS.get(config.provider)
443
+ if provider_cls is None:
444
+ logger.warning("Unknown LLM provider: %s", config.provider)
445
+ return None
446
+
447
+ try:
448
+ kwargs: dict = {}
449
+ if config.model:
450
+ kwargs["model"] = config.model
451
+ if config.provider == "ollama" and config.base_url:
452
+ kwargs["base_url"] = config.base_url
453
+ if config.provider == "anthropic" and config.api_key:
454
+ kwargs["api_key"] = config.api_key
455
+
456
+ backend = provider_cls(**kwargs)
457
+ logger.info("Using LLM provider: %s", config.provider)
458
+ return backend
459
+ except Exception as e:
460
+ logger.warning("Failed to initialize %s backend: %s", config.provider, e)
461
+ return None
buildlog/mcp/server.py CHANGED
@@ -6,16 +6,18 @@ from mcp.server.fastmcp import FastMCP
6
6
 
7
7
  from buildlog.mcp.tools import (
8
8
  buildlog_diff,
9
- buildlog_end_session,
9
+ buildlog_experiment_end,
10
+ buildlog_experiment_metrics,
10
11
  buildlog_experiment_report,
12
+ buildlog_experiment_start,
13
+ buildlog_gauntlet_accept_risk,
14
+ buildlog_gauntlet_issues,
11
15
  buildlog_learn_from_review,
12
16
  buildlog_log_mistake,
13
17
  buildlog_log_reward,
14
18
  buildlog_promote,
15
19
  buildlog_reject,
16
20
  buildlog_rewards,
17
- buildlog_session_metrics,
18
- buildlog_start_session,
19
21
  buildlog_status,
20
22
  )
21
23
 
@@ -31,12 +33,16 @@ mcp.tool()(buildlog_log_reward)
31
33
  mcp.tool()(buildlog_rewards)
32
34
 
33
35
  # Session tracking tools (experiment infrastructure)
34
- mcp.tool()(buildlog_start_session)
35
- mcp.tool()(buildlog_end_session)
36
+ mcp.tool()(buildlog_experiment_start)
37
+ mcp.tool()(buildlog_experiment_end)
36
38
  mcp.tool()(buildlog_log_mistake)
37
- mcp.tool()(buildlog_session_metrics)
39
+ mcp.tool()(buildlog_experiment_metrics)
38
40
  mcp.tool()(buildlog_experiment_report)
39
41
 
42
+ # Gauntlet loop tools
43
+ mcp.tool()(buildlog_gauntlet_issues)
44
+ mcp.tool()(buildlog_gauntlet_accept_risk)
45
+
40
46
 
41
47
  def main() -> None:
42
48
  """Run the MCP server."""