buildlog 0.6.1__py3-none-any.whl → 0.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. buildlog/__init__.py +1 -1
  2. buildlog/cli.py +589 -44
  3. buildlog/confidence.py +27 -0
  4. buildlog/core/__init__.py +12 -0
  5. buildlog/core/bandit.py +699 -0
  6. buildlog/core/operations.py +499 -11
  7. buildlog/distill.py +80 -1
  8. buildlog/engine/__init__.py +61 -0
  9. buildlog/engine/bandit.py +23 -0
  10. buildlog/engine/confidence.py +28 -0
  11. buildlog/engine/embeddings.py +28 -0
  12. buildlog/engine/experiments.py +619 -0
  13. buildlog/engine/types.py +31 -0
  14. buildlog/llm.py +461 -0
  15. buildlog/mcp/server.py +12 -6
  16. buildlog/mcp/tools.py +166 -13
  17. buildlog/render/__init__.py +19 -2
  18. buildlog/render/claude_md.py +74 -26
  19. buildlog/render/continue_dev.py +102 -0
  20. buildlog/render/copilot.py +100 -0
  21. buildlog/render/cursor.py +105 -0
  22. buildlog/render/tracking.py +20 -1
  23. buildlog/render/windsurf.py +95 -0
  24. buildlog/seeds.py +41 -0
  25. buildlog/skills.py +69 -6
  26. {buildlog-0.6.1.data → buildlog-0.8.0.data}/data/share/buildlog/copier.yml +0 -4
  27. buildlog-0.8.0.data/data/share/buildlog/template/buildlog/_TEMPLATE_QUICK.md +21 -0
  28. buildlog-0.8.0.dist-info/METADATA +151 -0
  29. buildlog-0.8.0.dist-info/RECORD +54 -0
  30. buildlog-0.6.1.dist-info/METADATA +0 -490
  31. buildlog-0.6.1.dist-info/RECORD +0 -41
  32. {buildlog-0.6.1.data → buildlog-0.8.0.data}/data/share/buildlog/post_gen.py +0 -0
  33. {buildlog-0.6.1.data → buildlog-0.8.0.data}/data/share/buildlog/template/buildlog/.gitkeep +0 -0
  34. {buildlog-0.6.1.data → buildlog-0.8.0.data}/data/share/buildlog/template/buildlog/2026-01-01-example.md +0 -0
  35. {buildlog-0.6.1.data → buildlog-0.8.0.data}/data/share/buildlog/template/buildlog/BUILDLOG_SYSTEM.md +0 -0
  36. {buildlog-0.6.1.data → buildlog-0.8.0.data}/data/share/buildlog/template/buildlog/_TEMPLATE.md +0 -0
  37. {buildlog-0.6.1.data → buildlog-0.8.0.data}/data/share/buildlog/template/buildlog/assets/.gitkeep +0 -0
  38. {buildlog-0.6.1.dist-info → buildlog-0.8.0.dist-info}/WHEEL +0 -0
  39. {buildlog-0.6.1.dist-info → buildlog-0.8.0.dist-info}/entry_points.txt +0 -0
  40. {buildlog-0.6.1.dist-info → buildlog-0.8.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,619 @@
1
+ """Agent-agnostic experiment tracking engine.
2
+
3
+ This module contains the core session tracking, mistake logging, and reward
4
+ signal logic decoupled from any specific agent or skill generation mechanism.
5
+
6
+ The key difference from core/operations.py: functions here accept
7
+ `available_rules: list[str]` as a parameter rather than calling
8
+ `generate_skills()` internally. The caller (CLI, MCP, etc.) is responsible
9
+ for getting the rule list however it wants. The engine doesn't care where
10
+ rules come from.
11
+
12
+ Usage:
13
+ from buildlog.engine.experiments import start_session, end_session, log_mistake
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ import hashlib
19
+ import json
20
+ from datetime import datetime, timezone
21
+ from pathlib import Path
22
+ from typing import Literal
23
+
24
+ from buildlog.core.bandit import ThompsonSamplingBandit
25
+ from buildlog.core.operations import (
26
+ EndSessionResult,
27
+ LogMistakeResult,
28
+ LogRewardResult,
29
+ Mistake,
30
+ RewardEvent,
31
+ RewardSummary,
32
+ Session,
33
+ SessionMetrics,
34
+ StartSessionResult,
35
+ )
36
+
37
+ __all__ = [
38
+ "start_session",
39
+ "end_session",
40
+ "log_mistake",
41
+ "log_reward",
42
+ "get_rewards",
43
+ "session_metrics",
44
+ "experiment_report",
45
+ ]
46
+
47
+
48
+ # ---------------------------------------------------------------------------
49
+ # Path helpers (duplicated from operations to avoid tight coupling)
50
+ # ---------------------------------------------------------------------------
51
+
52
+
53
+ def _get_sessions_path(buildlog_dir: Path) -> Path:
54
+ return buildlog_dir / ".buildlog" / "sessions.jsonl"
55
+
56
+
57
+ def _get_mistakes_path(buildlog_dir: Path) -> Path:
58
+ return buildlog_dir / ".buildlog" / "mistakes.jsonl"
59
+
60
+
61
+ def _get_active_session_path(buildlog_dir: Path) -> Path:
62
+ return buildlog_dir / ".buildlog" / "active_session.json"
63
+
64
+
65
+ def _get_rewards_path(buildlog_dir: Path) -> Path:
66
+ return buildlog_dir / ".buildlog" / "reward_events.jsonl"
67
+
68
+
69
+ def _get_promoted_path(buildlog_dir: Path) -> Path:
70
+ return buildlog_dir / ".buildlog" / "promoted.json"
71
+
72
+
73
+ def _load_json_set(path: Path, key: str) -> set[str]:
74
+ if not path.exists():
75
+ return set()
76
+ try:
77
+ data = json.loads(path.read_text())
78
+ return set(data.get(key, []))
79
+ except (json.JSONDecodeError, OSError):
80
+ return set()
81
+
82
+
83
+ def _get_current_rules(buildlog_dir: Path) -> list[str]:
84
+ promoted_path = _get_promoted_path(buildlog_dir)
85
+ return list(_load_json_set(promoted_path, "skill_ids"))
86
+
87
+
88
+ def _generate_session_id(now: datetime) -> str:
89
+ return f"session-{now.strftime('%Y%m%d-%H%M%S')}-{now.microsecond:06d}"
90
+
91
+
92
+ def _generate_mistake_id(error_class: str, now: datetime) -> str:
93
+ return f"mistake-{error_class[:10]}-{now.strftime('%Y%m%d-%H%M%S')}-{now.microsecond:06d}"
94
+
95
+
96
+ def _compute_semantic_hash(description: str) -> str:
97
+ normalized = " ".join(description.lower().split())
98
+ return hashlib.sha256(normalized.encode()).hexdigest()[:16]
99
+
100
+
101
+ def _generate_reward_id(outcome: str, timestamp: datetime) -> str:
102
+ ts_str = timestamp.isoformat()
103
+ normalized = f"{outcome}:{ts_str}"
104
+ hash_hex = hashlib.sha256(normalized.encode("utf-8")).hexdigest()[:10]
105
+ return f"rew-{hash_hex}"
106
+
107
+
108
+ def _compute_reward_value(
109
+ outcome: Literal["accepted", "revision", "rejected"],
110
+ revision_distance: float | None,
111
+ ) -> float:
112
+ if outcome == "accepted":
113
+ return 1.0
114
+ elif outcome == "rejected":
115
+ return 0.0
116
+ else:
117
+ distance = revision_distance if revision_distance is not None else 0.5
118
+ return max(0.0, min(1.0, 1.0 - distance))
119
+
120
+
121
+ def _load_sessions(buildlog_dir: Path) -> list[Session]:
122
+ sessions_path = _get_sessions_path(buildlog_dir)
123
+ if not sessions_path.exists():
124
+ return []
125
+ sessions = []
126
+ for line in sessions_path.read_text().strip().split("\n"):
127
+ if line:
128
+ try:
129
+ data = json.loads(line)
130
+ sessions.append(Session.from_dict(data))
131
+ except (json.JSONDecodeError, KeyError):
132
+ continue
133
+ return sessions
134
+
135
+
136
+ def _load_mistakes(buildlog_dir: Path) -> list[Mistake]:
137
+ mistakes_path = _get_mistakes_path(buildlog_dir)
138
+ if not mistakes_path.exists():
139
+ return []
140
+ mistakes = []
141
+ for line in mistakes_path.read_text().strip().split("\n"):
142
+ if line:
143
+ try:
144
+ data = json.loads(line)
145
+ mistakes.append(Mistake.from_dict(data))
146
+ except (json.JSONDecodeError, KeyError):
147
+ continue
148
+ return mistakes
149
+
150
+
151
+ def _find_similar_prior_mistake(
152
+ description: str,
153
+ error_class: str,
154
+ current_session_id: str,
155
+ all_mistakes: list[Mistake],
156
+ ) -> Mistake | None:
157
+ semantic_hash = _compute_semantic_hash(description)
158
+ for mistake in all_mistakes:
159
+ if (
160
+ mistake.session_id != current_session_id
161
+ and mistake.error_class == error_class
162
+ ):
163
+ if mistake.semantic_hash == semantic_hash:
164
+ return mistake
165
+ desc_words = set(description.lower().split())
166
+ mistake_words = set(mistake.description.lower().split())
167
+ if len(desc_words & mistake_words) / max(len(desc_words), 1) > 0.7:
168
+ return mistake
169
+ return None
170
+
171
+
172
+ # ---------------------------------------------------------------------------
173
+ # Public API — agent-agnostic experiment functions
174
+ # ---------------------------------------------------------------------------
175
+
176
+
177
+ def start_session(
178
+ buildlog_dir: Path,
179
+ error_class: str | None = None,
180
+ notes: str | None = None,
181
+ select_k: int = 3,
182
+ available_rules: list[str] | None = None,
183
+ seed_rule_ids: set[str] | None = None,
184
+ ) -> StartSessionResult:
185
+ """Start a new experiment session with bandit-selected rules.
186
+
187
+ Unlike core/operations.start_session, this function accepts
188
+ ``available_rules`` directly rather than calling generate_skills().
189
+ If ``available_rules`` is None, falls back to reading promoted rule IDs
190
+ from .buildlog/promoted.json.
191
+
192
+ Args:
193
+ buildlog_dir: Path to buildlog directory.
194
+ error_class: Error class being targeted (context for bandits).
195
+ notes: Optional notes about the session.
196
+ select_k: Number of rules to select via Thompson Sampling.
197
+ available_rules: Explicit list of candidate rule IDs. If None,
198
+ reads promoted IDs from .buildlog/promoted.json.
199
+ seed_rule_ids: Set of rule IDs that get boosted priors.
200
+
201
+ Returns:
202
+ StartSessionResult with session ID, rules count, and selected rules.
203
+ """
204
+ now = datetime.now(timezone.utc)
205
+ session_id = _generate_session_id(now)
206
+
207
+ current_rules = (
208
+ available_rules
209
+ if available_rules is not None
210
+ else _get_current_rules(buildlog_dir)
211
+ )
212
+
213
+ selected_rules: list[str] = []
214
+
215
+ if current_rules:
216
+ bandit_path = buildlog_dir / "bandit_state.jsonl"
217
+ bandit = ThompsonSamplingBandit(bandit_path)
218
+
219
+ selected_rules = bandit.select(
220
+ candidates=current_rules,
221
+ context=error_class or "general",
222
+ k=min(select_k, len(current_rules)),
223
+ seed_rule_ids=seed_rule_ids or set(),
224
+ )
225
+
226
+ session = Session(
227
+ id=session_id,
228
+ started_at=now,
229
+ rules_at_start=current_rules,
230
+ selected_rules=selected_rules,
231
+ error_class=error_class,
232
+ notes=notes,
233
+ )
234
+
235
+ active_path = _get_active_session_path(buildlog_dir)
236
+ active_path.parent.mkdir(parents=True, exist_ok=True)
237
+ active_path.write_text(json.dumps(session.to_dict(), indent=2))
238
+
239
+ return StartSessionResult(
240
+ session_id=session_id,
241
+ error_class=error_class,
242
+ rules_count=len(current_rules),
243
+ selected_rules=selected_rules,
244
+ message=(
245
+ f"Started session {session_id}: selected {len(selected_rules)}/"
246
+ f"{len(current_rules)} rules via Thompson Sampling"
247
+ ),
248
+ )
249
+
250
+
251
+ def end_session(
252
+ buildlog_dir: Path,
253
+ entry_file: str | None = None,
254
+ notes: str | None = None,
255
+ ) -> EndSessionResult:
256
+ """End the current experiment session.
257
+
258
+ Args:
259
+ buildlog_dir: Path to buildlog directory.
260
+ entry_file: Corresponding buildlog entry file, if any.
261
+ notes: Additional notes to append.
262
+
263
+ Returns:
264
+ EndSessionResult with session metrics.
265
+ """
266
+ active_path = _get_active_session_path(buildlog_dir)
267
+
268
+ if not active_path.exists():
269
+ raise ValueError("No active session to end")
270
+
271
+ session_data = json.loads(active_path.read_text())
272
+ session = Session.from_dict(session_data)
273
+
274
+ now = datetime.now(timezone.utc)
275
+ session.ended_at = now
276
+ session.rules_at_end = _get_current_rules(buildlog_dir)
277
+ if entry_file:
278
+ session.entry_file = entry_file
279
+ if notes:
280
+ session.notes = f"{session.notes or ''}\n{notes}".strip()
281
+
282
+ sessions_path = _get_sessions_path(buildlog_dir)
283
+ sessions_path.parent.mkdir(parents=True, exist_ok=True)
284
+ with open(sessions_path, "a") as f:
285
+ f.write(json.dumps(session.to_dict()) + "\n")
286
+
287
+ active_path.unlink()
288
+
289
+ all_mistakes = _load_mistakes(buildlog_dir)
290
+ session_mistakes = [m for m in all_mistakes if m.session_id == session.id]
291
+ repeated = sum(1 for m in session_mistakes if m.was_repeat)
292
+
293
+ duration = (session.ended_at - session.started_at).total_seconds() / 60
294
+
295
+ return EndSessionResult(
296
+ session_id=session.id,
297
+ duration_minutes=round(duration, 1),
298
+ mistakes_logged=len(session_mistakes),
299
+ repeated_mistakes=repeated,
300
+ rules_at_start=len(session.rules_at_start),
301
+ rules_at_end=len(session.rules_at_end),
302
+ message=f"Ended session {session.id} ({duration:.1f}min, {len(session_mistakes)} mistakes, {repeated} repeats)",
303
+ )
304
+
305
+
306
+ def log_mistake(
307
+ buildlog_dir: Path,
308
+ error_class: str,
309
+ description: str,
310
+ corrected_by_rule: str | None = None,
311
+ ) -> LogMistakeResult:
312
+ """Log a mistake during an experiment session.
313
+
314
+ Updates the bandit with reward=0 for selected rules in the session.
315
+
316
+ Args:
317
+ buildlog_dir: Path to buildlog directory.
318
+ error_class: Category of error.
319
+ description: Description of the mistake.
320
+ corrected_by_rule: Rule ID that should have prevented this.
321
+
322
+ Returns:
323
+ LogMistakeResult indicating if this was a repeat.
324
+ """
325
+ active_path = _get_active_session_path(buildlog_dir)
326
+
327
+ if not active_path.exists():
328
+ raise ValueError(
329
+ "No active session - start one with 'buildlog experiment start'"
330
+ )
331
+
332
+ session_data = json.loads(active_path.read_text())
333
+ session_id = session_data["id"]
334
+
335
+ now = datetime.now(timezone.utc)
336
+ mistake_id = _generate_mistake_id(error_class, now)
337
+
338
+ all_mistakes = _load_mistakes(buildlog_dir)
339
+ similar = _find_similar_prior_mistake(
340
+ description, error_class, session_id, all_mistakes
341
+ )
342
+
343
+ mistake = Mistake(
344
+ id=mistake_id,
345
+ session_id=session_id,
346
+ timestamp=now,
347
+ error_class=error_class,
348
+ description=description,
349
+ semantic_hash=_compute_semantic_hash(description),
350
+ was_repeat=similar is not None,
351
+ corrected_by_rule=corrected_by_rule,
352
+ )
353
+
354
+ mistakes_path = _get_mistakes_path(buildlog_dir)
355
+ mistakes_path.parent.mkdir(parents=True, exist_ok=True)
356
+ with open(mistakes_path, "a") as f:
357
+ f.write(json.dumps(mistake.to_dict()) + "\n")
358
+
359
+ selected_rules = session_data.get("selected_rules", [])
360
+ if selected_rules:
361
+ bandit_path = buildlog_dir / "bandit_state.jsonl"
362
+ bandit = ThompsonSamplingBandit(bandit_path)
363
+ context = session_data.get("error_class") or "general"
364
+ bandit.batch_update(
365
+ rule_ids=selected_rules,
366
+ reward=0.0,
367
+ context=context,
368
+ )
369
+
370
+ message = f"Logged mistake: {error_class}"
371
+ if similar:
372
+ message += f" (REPEAT of {similar.id})"
373
+ if selected_rules:
374
+ message += f" | Updated bandit: {len(selected_rules)} rules got reward=0"
375
+
376
+ return LogMistakeResult(
377
+ mistake_id=mistake_id,
378
+ session_id=session_id,
379
+ was_repeat=similar is not None,
380
+ similar_prior=similar.id if similar else None,
381
+ message=message,
382
+ )
383
+
384
+
385
+ def log_reward(
386
+ buildlog_dir: Path,
387
+ outcome: Literal["accepted", "revision", "rejected"],
388
+ rules_active: list[str] | None = None,
389
+ revision_distance: float | None = None,
390
+ error_class: str | None = None,
391
+ notes: str | None = None,
392
+ source: str | None = None,
393
+ ) -> LogRewardResult:
394
+ """Log a reward event for bandit learning.
395
+
396
+ Args:
397
+ buildlog_dir: Path to buildlog directory.
398
+ outcome: Type of feedback (accepted/revision/rejected).
399
+ rules_active: List of rule IDs in context. If None, uses session's.
400
+ revision_distance: How much correction needed (0-1).
401
+ error_class: Category of error if applicable.
402
+ notes: Optional notes.
403
+ source: Where this feedback came from.
404
+
405
+ Returns:
406
+ LogRewardResult with confirmation.
407
+ """
408
+ now = datetime.now(timezone.utc)
409
+ reward_id = _generate_reward_id(outcome, now)
410
+ reward_value = _compute_reward_value(outcome, revision_distance)
411
+
412
+ active_path = _get_active_session_path(buildlog_dir)
413
+ if active_path.exists():
414
+ session_data = json.loads(active_path.read_text())
415
+ if rules_active is None:
416
+ rules_active = session_data.get("selected_rules", [])
417
+ if error_class is None:
418
+ error_class = session_data.get("error_class")
419
+
420
+ event = RewardEvent(
421
+ id=reward_id,
422
+ timestamp=now,
423
+ outcome=outcome,
424
+ reward_value=reward_value,
425
+ rules_active=rules_active or [],
426
+ revision_distance=revision_distance,
427
+ error_class=error_class,
428
+ notes=notes,
429
+ source=source or "manual",
430
+ )
431
+
432
+ rewards_path = _get_rewards_path(buildlog_dir)
433
+ rewards_path.parent.mkdir(parents=True, exist_ok=True)
434
+ with open(rewards_path, "a") as f:
435
+ f.write(json.dumps(event.to_dict()) + "\n")
436
+
437
+ if rules_active:
438
+ bandit_path = buildlog_dir / "bandit_state.jsonl"
439
+ bandit = ThompsonSamplingBandit(bandit_path)
440
+ bandit.batch_update(
441
+ rule_ids=rules_active,
442
+ reward=reward_value,
443
+ context=error_class or "general",
444
+ )
445
+
446
+ total_events = 0
447
+ if rewards_path.exists():
448
+ total_events = sum(
449
+ 1 for line in rewards_path.read_text().strip().split("\n") if line
450
+ )
451
+
452
+ rules_count = len(rules_active) if rules_active else 0
453
+ message = f"Logged {outcome} (reward={reward_value:.2f})"
454
+ if rules_count > 0:
455
+ message += f" | Updated bandit: {rules_count} rules"
456
+
457
+ return LogRewardResult(
458
+ reward_id=reward_id,
459
+ reward_value=reward_value,
460
+ total_events=total_events,
461
+ message=message,
462
+ )
463
+
464
+
465
+ def get_rewards(
466
+ buildlog_dir: Path,
467
+ limit: int | None = None,
468
+ ) -> RewardSummary:
469
+ """Get reward events with summary statistics.
470
+
471
+ Args:
472
+ buildlog_dir: Path to buildlog directory.
473
+ limit: Maximum number of events to return (most recent first).
474
+
475
+ Returns:
476
+ RewardSummary with events and statistics.
477
+ """
478
+ rewards_path = _get_rewards_path(buildlog_dir)
479
+
480
+ if not rewards_path.exists():
481
+ return RewardSummary(
482
+ total_events=0,
483
+ accepted=0,
484
+ revisions=0,
485
+ rejected=0,
486
+ mean_reward=0.0,
487
+ events=[],
488
+ )
489
+
490
+ events: list[RewardEvent] = []
491
+ for line in rewards_path.read_text().strip().split("\n"):
492
+ if line:
493
+ try:
494
+ data = json.loads(line)
495
+ events.append(RewardEvent.from_dict(data))
496
+ except (json.JSONDecodeError, KeyError):
497
+ continue
498
+
499
+ total = len(events)
500
+ accepted = sum(1 for e in events if e.outcome == "accepted")
501
+ revisions = sum(1 for e in events if e.outcome == "revision")
502
+ rejected = sum(1 for e in events if e.outcome == "rejected")
503
+ mean_reward = sum(e.reward_value for e in events) / total if total > 0 else 0.0
504
+
505
+ events.sort(key=lambda e: e.timestamp, reverse=True)
506
+ if limit is not None:
507
+ events = events[:limit]
508
+
509
+ return RewardSummary(
510
+ total_events=total,
511
+ accepted=accepted,
512
+ revisions=revisions,
513
+ rejected=rejected,
514
+ mean_reward=mean_reward,
515
+ events=events,
516
+ )
517
+
518
+
519
+ def session_metrics(
520
+ buildlog_dir: Path,
521
+ session_id: str | None = None,
522
+ ) -> SessionMetrics:
523
+ """Get metrics for a session or all sessions.
524
+
525
+ Args:
526
+ buildlog_dir: Path to buildlog directory.
527
+ session_id: Specific session ID, or None for aggregate metrics.
528
+
529
+ Returns:
530
+ SessionMetrics with mistake rates and rule changes.
531
+ """
532
+ sessions = _load_sessions(buildlog_dir)
533
+ mistakes = _load_mistakes(buildlog_dir)
534
+
535
+ if session_id:
536
+ session = next((s for s in sessions if s.id == session_id), None)
537
+ if not session:
538
+ raise ValueError(f"Session not found: {session_id}")
539
+
540
+ session_mistakes = [m for m in mistakes if m.session_id == session_id]
541
+ total = len(session_mistakes)
542
+ repeated = sum(1 for m in session_mistakes if m.was_repeat)
543
+
544
+ return SessionMetrics(
545
+ session_id=session_id,
546
+ total_mistakes=total,
547
+ repeated_mistakes=repeated,
548
+ repeated_mistake_rate=repeated / total if total > 0 else 0.0,
549
+ rules_at_start=len(session.rules_at_start),
550
+ rules_at_end=len(session.rules_at_end),
551
+ rules_added=len(session.rules_at_end) - len(session.rules_at_start),
552
+ )
553
+ else:
554
+ total = len(mistakes)
555
+ repeated = sum(1 for m in mistakes if m.was_repeat)
556
+
557
+ rules_start = sessions[0].rules_at_start if sessions else []
558
+ rules_end = sessions[-1].rules_at_end if sessions else []
559
+
560
+ return SessionMetrics(
561
+ session_id="aggregate",
562
+ total_mistakes=total,
563
+ repeated_mistakes=repeated,
564
+ repeated_mistake_rate=repeated / total if total > 0 else 0.0,
565
+ rules_at_start=len(rules_start),
566
+ rules_at_end=len(rules_end),
567
+ rules_added=len(rules_end) - len(rules_start),
568
+ )
569
+
570
+
571
+ def experiment_report(buildlog_dir: Path) -> dict:
572
+ """Generate a comprehensive experiment report.
573
+
574
+ Returns:
575
+ Dictionary with sessions, metrics, and analysis.
576
+ """
577
+ sessions = _load_sessions(buildlog_dir)
578
+ mistakes = _load_mistakes(buildlog_dir)
579
+
580
+ session_metrics_list = []
581
+ for session in sessions:
582
+ session_mistakes = [m for m in mistakes if m.session_id == session.id]
583
+ total = len(session_mistakes)
584
+ repeated = sum(1 for m in session_mistakes if m.was_repeat)
585
+ session_metrics_list.append(
586
+ {
587
+ "session_id": session.id,
588
+ "started_at": session.started_at.isoformat(),
589
+ "error_class": session.error_class,
590
+ "total_mistakes": total,
591
+ "repeated_mistakes": repeated,
592
+ "repeated_mistake_rate": repeated / total if total > 0 else 0.0,
593
+ "rules_added": len(session.rules_at_end) - len(session.rules_at_start),
594
+ }
595
+ )
596
+
597
+ total_mistakes = len(mistakes)
598
+ total_repeated = sum(1 for m in mistakes if m.was_repeat)
599
+
600
+ error_classes: dict[str, dict] = {}
601
+ for mistake in mistakes:
602
+ if mistake.error_class not in error_classes:
603
+ error_classes[mistake.error_class] = {"total": 0, "repeated": 0}
604
+ error_classes[mistake.error_class]["total"] += 1
605
+ if mistake.was_repeat:
606
+ error_classes[mistake.error_class]["repeated"] += 1
607
+
608
+ return {
609
+ "summary": {
610
+ "total_sessions": len(sessions),
611
+ "total_mistakes": total_mistakes,
612
+ "total_repeated": total_repeated,
613
+ "overall_repeat_rate": (
614
+ total_repeated / total_mistakes if total_mistakes > 0 else 0.0
615
+ ),
616
+ },
617
+ "sessions": session_metrics_list,
618
+ "error_classes": error_classes,
619
+ }
@@ -0,0 +1,31 @@
1
+ """Pure data types for the buildlog engine.
2
+
3
+ Re-exports dataclasses from their canonical locations. These are pure data
4
+ structures with no I/O dependencies, suitable for use in any context.
5
+
6
+ Usage:
7
+ from buildlog.engine.types import Skill, Session, Mistake, RewardEvent
8
+ """
9
+
10
+ from buildlog.confidence import ConfidenceConfig, ConfidenceMetrics
11
+ from buildlog.core.bandit import BetaParams
12
+ from buildlog.core.operations import (
13
+ Mistake,
14
+ RewardEvent,
15
+ RewardSummary,
16
+ Session,
17
+ SessionMetrics,
18
+ )
19
+ from buildlog.skills import Skill
20
+
21
+ __all__ = [
22
+ "Skill",
23
+ "Session",
24
+ "SessionMetrics",
25
+ "Mistake",
26
+ "RewardEvent",
27
+ "RewardSummary",
28
+ "BetaParams",
29
+ "ConfidenceMetrics",
30
+ "ConfidenceConfig",
31
+ ]