buildlog 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
buildlog/stats.py ADDED
@@ -0,0 +1,469 @@
1
+ """Statistics and analytics for buildlog entries."""
2
+
3
+ from __future__ import annotations
4
+
5
+ __all__ = [
6
+ "BuildlogStats",
7
+ "calculate_stats",
8
+ "format_dashboard",
9
+ "format_json",
10
+ ]
11
+
12
+ import json
13
+ import logging
14
+ from dataclasses import dataclass, field
15
+ from datetime import UTC, date, datetime, timedelta
16
+ from itertools import takewhile
17
+ from pathlib import Path
18
+ from typing import Final, NamedTuple, TypedDict
19
+
20
+ from buildlog.distill import (
21
+ CATEGORIES,
22
+ extract_title_and_context,
23
+ iter_buildlog_entries,
24
+ parse_improvements,
25
+ )
26
+
27
+ logger = logging.getLogger(__name__)
28
+
29
+ # Quality thresholds
30
+ TOP_SOURCES_LIMIT: Final[int] = 5
31
+ RECENT_ENTRY_THRESHOLD_DAYS: Final[int] = 7
32
+
33
+
34
+ # TypedDict definitions for precise return types
35
+ class EntryStatsDict(TypedDict):
36
+ """Type for entry statistics dictionary."""
37
+
38
+ total: int
39
+ this_week: int
40
+ this_month: int
41
+ with_improvements: int
42
+ coverage_percent: int
43
+
44
+
45
+ class InsightStatsDict(TypedDict):
46
+ """Type for insight statistics dictionary."""
47
+
48
+ total: int
49
+ by_category: dict[str, int]
50
+
51
+
52
+ class StreakStatsDict(TypedDict):
53
+ """Type for streak statistics dictionary."""
54
+
55
+ current: int
56
+ longest: int
57
+
58
+
59
+ class PipelineStatsDict(TypedDict):
60
+ """Type for pipeline statistics dictionary."""
61
+
62
+ last_distill: str | None
63
+ last_skills: str | None
64
+ last_export: str | None
65
+
66
+
67
+ class SourceDict(TypedDict):
68
+ """Type for source dictionary."""
69
+
70
+ name: str
71
+ insights: int
72
+
73
+
74
+ class BuildlogStatsDict(TypedDict):
75
+ """Type for full buildlog stats dictionary."""
76
+
77
+ generated_at: str
78
+ entries: EntryStatsDict
79
+ insights: InsightStatsDict
80
+ top_sources: list[SourceDict]
81
+ pipeline: PipelineStatsDict
82
+ streak: StreakStatsDict
83
+ warnings: list[str]
84
+
85
+
86
+ @dataclass(frozen=True, slots=True)
87
+ class EntryStats:
88
+ """Statistics about buildlog entries."""
89
+
90
+ total: int = 0
91
+ this_week: int = 0
92
+ this_month: int = 0
93
+ with_improvements: int = 0
94
+ coverage_percent: int = 0
95
+
96
+
97
+ @dataclass(frozen=True, slots=True)
98
+ class InsightStats:
99
+ """Statistics about insights/learnings.
100
+
101
+ Note: frozen=True prevents attribute reassignment but dict contents
102
+ are still mutable (Python limitation). Treat as immutable by convention.
103
+ """
104
+
105
+ total: int = 0
106
+ by_category: dict[str, int] = field(default_factory=dict)
107
+
108
+
109
+ @dataclass(frozen=True, slots=True)
110
+ class StreakStats:
111
+ """Statistics about entry streaks."""
112
+
113
+ current: int = 0
114
+ longest: int = 0
115
+
116
+
117
+ @dataclass(frozen=True, slots=True)
118
+ class PipelineStats:
119
+ """Statistics about the knowledge pipeline."""
120
+
121
+ last_distill: str | None = None
122
+ last_skills: str | None = None
123
+ last_export: str | None = None
124
+
125
+
126
+ class ParsedEntry(NamedTuple):
127
+ """An immutable parsed buildlog entry."""
128
+
129
+ path: Path
130
+ name: str
131
+ entry_date: date | None
132
+ title: str
133
+ has_improvements: bool
134
+ insights: dict[str, list[str]]
135
+
136
+ @property
137
+ def insight_count(self) -> int:
138
+ """Total number of insights in this entry."""
139
+ return sum(len(items) for items in self.insights.values())
140
+
141
+
142
+ @dataclass
143
+ class BuildlogStats:
144
+ """Complete statistics for a buildlog directory."""
145
+
146
+ generated_at: str
147
+ entries: EntryStats
148
+ insights: InsightStats
149
+ top_sources: list[SourceDict]
150
+ pipeline: PipelineStats
151
+ streak: StreakStats
152
+ warnings: list[str]
153
+
154
+
155
+ def parse_date_from_string(date_str: str) -> date | None:
156
+ """Parse a date string like '2026-01-15' into a date object."""
157
+ try:
158
+ return date.fromisoformat(date_str)
159
+ except ValueError:
160
+ return None
161
+
162
+
163
+ def _parse_entry(path: Path, date_str: str) -> ParsedEntry | None:
164
+ """Parse a buildlog entry file into an immutable structure.
165
+
166
+ Args:
167
+ path: Path to the entry file.
168
+ date_str: Date string extracted from filename (YYYY-MM-DD).
169
+
170
+ Returns:
171
+ ParsedEntry or None if parsing fails.
172
+ """
173
+ try:
174
+ content = path.read_text(encoding="utf-8")
175
+ except (OSError, UnicodeDecodeError) as e:
176
+ logger.warning("Failed to read %s: %s", path, e)
177
+ return None
178
+
179
+ title = extract_title_and_context(content)
180
+ if not title:
181
+ title = "(untitled)"
182
+
183
+ insights = parse_improvements(content)
184
+ has_improvements = any(len(items) > 0 for items in insights.values())
185
+
186
+ return ParsedEntry(
187
+ path=path,
188
+ name=path.name,
189
+ entry_date=parse_date_from_string(date_str),
190
+ title=title,
191
+ has_improvements=has_improvements,
192
+ insights=insights,
193
+ )
194
+
195
+
196
+ def calculate_streak(entry_dates: list[date]) -> tuple[int, int]:
197
+ """Calculate current and longest streak of consecutive days with entries.
198
+
199
+ Args:
200
+ entry_dates: List of dates with entries.
201
+
202
+ Returns:
203
+ Tuple of (current_streak, longest_streak)
204
+ """
205
+ if not entry_dates:
206
+ return 0, 0
207
+
208
+ unique_dates = sorted(set(entry_dates), reverse=True)
209
+ if not unique_dates:
210
+ return 0, 0
211
+
212
+ today = date.today()
213
+
214
+ # Calculate current streak using functional approach
215
+ current_streak = 0
216
+ if unique_dates[0] >= today - timedelta(days=1):
217
+ consecutive = list(
218
+ takewhile(
219
+ lambda pair: pair[0] == 0
220
+ or unique_dates[pair[0] - 1] - unique_dates[pair[0]]
221
+ == timedelta(days=1),
222
+ enumerate(unique_dates),
223
+ )
224
+ )
225
+ current_streak = len(consecutive)
226
+
227
+ # Calculate longest streak
228
+ sorted_dates = sorted(unique_dates)
229
+
230
+ def streak_lengths(dates: list[date]) -> list[int]:
231
+ """Generate lengths of consecutive date runs."""
232
+ if not dates:
233
+ return [0]
234
+ lengths: list[int] = []
235
+ current_run = 1
236
+ for i in range(1, len(dates)):
237
+ if dates[i] - dates[i - 1] == timedelta(days=1):
238
+ current_run += 1
239
+ else:
240
+ lengths.append(current_run)
241
+ current_run = 1
242
+ lengths.append(current_run)
243
+ return lengths
244
+
245
+ longest_streak = max(streak_lengths(sorted_dates), default=1)
246
+
247
+ return current_streak, longest_streak
248
+
249
+
250
+ def _check_quality(entries: list[ParsedEntry]) -> list[str]:
251
+ """Generate quality warnings for entries."""
252
+ warnings: list[str] = []
253
+
254
+ empty_improvements = [e for e in entries if not e.has_improvements]
255
+ if empty_improvements:
256
+ warnings.append(
257
+ f"{len(empty_improvements)} entries have empty Improvements sections"
258
+ )
259
+
260
+ if entries:
261
+ entry_dates = [e.entry_date for e in entries if e.entry_date]
262
+ if entry_dates:
263
+ most_recent = max(entry_dates)
264
+ days_since = (date.today() - most_recent).days
265
+ if days_since > RECENT_ENTRY_THRESHOLD_DAYS:
266
+ warnings.append(
267
+ f"No entries in last {RECENT_ENTRY_THRESHOLD_DAYS} days "
268
+ f"(last entry: {days_since} days ago)"
269
+ )
270
+
271
+ return warnings
272
+
273
+
274
+ def _aggregate_insights(entries: list[ParsedEntry]) -> tuple[dict[str, int], int]:
275
+ """Aggregate insight counts from parsed entries.
276
+
277
+ Returns:
278
+ Tuple of (by_category dict, total count)
279
+ """
280
+ insight_totals: dict[str, int] = {cat: 0 for cat in CATEGORIES}
281
+
282
+ for entry in entries:
283
+ for category, items in entry.insights.items():
284
+ if category in insight_totals:
285
+ insight_totals[category] += len(items)
286
+
287
+ total_insights = sum(insight_totals.values())
288
+ return insight_totals, total_insights
289
+
290
+
291
+ def _compute_top_sources(entries: list[ParsedEntry]) -> list[SourceDict]:
292
+ """Compute top sources by insight count."""
293
+ entries_with_insights = [
294
+ (e, e.insight_count) for e in entries if e.insight_count > 0
295
+ ]
296
+ entries_with_insights.sort(key=lambda x: x[1], reverse=True)
297
+ return [
298
+ SourceDict(name=e.name, insights=count)
299
+ for e, count in entries_with_insights[:TOP_SOURCES_LIMIT]
300
+ ]
301
+
302
+
303
+ def calculate_stats(
304
+ buildlog_dir: Path, since_date: date | None = None
305
+ ) -> BuildlogStats:
306
+ """Calculate all statistics for a buildlog directory.
307
+
308
+ Args:
309
+ buildlog_dir: Path to the buildlog directory.
310
+ since_date: If provided, only include entries from this date onward.
311
+
312
+ Returns:
313
+ BuildlogStats with aggregated statistics.
314
+ """
315
+ # Parse all entries using functional map/filter pattern
316
+ parsed_or_none = [
317
+ _parse_entry(entry_path, date_str)
318
+ for entry_path, date_str in iter_buildlog_entries(buildlog_dir, since=since_date)
319
+ ]
320
+ entries = [e for e in parsed_or_none if e is not None]
321
+
322
+ # Calculate date-based stats
323
+ today = date.today()
324
+ week_ago = today - timedelta(days=7)
325
+ month_start = today.replace(day=1)
326
+
327
+ entry_dates = [e.entry_date for e in entries if e.entry_date]
328
+
329
+ this_week = sum(1 for d in entry_dates if d and d >= week_ago)
330
+ this_month = sum(1 for d in entry_dates if d and d >= month_start)
331
+
332
+ with_improvements = sum(1 for e in entries if e.has_improvements)
333
+ coverage_percent = int((with_improvements / len(entries) * 100) if entries else 0)
334
+
335
+ # Calculate insight stats
336
+ insight_totals, total_insights = _aggregate_insights(entries)
337
+
338
+ # Calculate top sources
339
+ top_sources = _compute_top_sources(entries)
340
+
341
+ # Calculate streaks
342
+ current_streak, longest_streak = calculate_streak(entry_dates)
343
+
344
+ # Generate warnings
345
+ warnings = _check_quality(entries)
346
+
347
+ if not entries:
348
+ if since_date:
349
+ warnings.insert(0, f"No entries found since {since_date}")
350
+ else:
351
+ warnings.insert(0, "No buildlog entries found")
352
+
353
+ return BuildlogStats(
354
+ generated_at=datetime.now(UTC).isoformat().replace("+00:00", "Z"),
355
+ entries=EntryStats(
356
+ total=len(entries),
357
+ this_week=this_week,
358
+ this_month=this_month,
359
+ with_improvements=with_improvements,
360
+ coverage_percent=coverage_percent,
361
+ ),
362
+ insights=InsightStats(
363
+ total=total_insights,
364
+ by_category=insight_totals,
365
+ ),
366
+ top_sources=top_sources,
367
+ pipeline=PipelineStats(),
368
+ streak=StreakStats(
369
+ current=current_streak,
370
+ longest=longest_streak,
371
+ ),
372
+ warnings=warnings,
373
+ )
374
+
375
+
376
+ def format_dashboard(stats: BuildlogStats, detailed: bool = False) -> str:
377
+ """Format stats as a terminal dashboard.
378
+
379
+ Args:
380
+ stats: The BuildlogStats to format.
381
+ detailed: If True, include more details like top sources.
382
+
383
+ Returns:
384
+ Formatted string for terminal output.
385
+ """
386
+ lines: list[str] = []
387
+
388
+ lines.append("Buildlog Statistics")
389
+ lines.append("=" * 50)
390
+ lines.append("")
391
+
392
+ # Entry stats
393
+ e = stats.entries
394
+ lines.append(
395
+ f"Entries: {e.total} total ({e.this_week} this week, {e.this_month} this month)"
396
+ )
397
+ lines.append(f"Coverage: {e.coverage_percent}% have Improvements filled out")
398
+ lines.append("")
399
+
400
+ # Insights by category
401
+ lines.append("By Category:")
402
+ for category, count in stats.insights.by_category.items():
403
+ display_name = category.replace("_", " ").title()
404
+ lines.append(f" {display_name:<20} {count:>3} insights")
405
+
406
+ lines.append(" " + "-" * 26)
407
+ lines.append(f" {'Total':<20} {stats.insights.total:>3} insights")
408
+ lines.append("")
409
+
410
+ # Top sources (if detailed or there are sources)
411
+ if detailed and stats.top_sources:
412
+ lines.append("Top Sources:")
413
+ for idx, source in enumerate(stats.top_sources, 1):
414
+ lines.append(f" {idx}. {source['name']} ({source['insights']} insights)")
415
+ lines.append("")
416
+
417
+ # Quality warnings
418
+ if stats.warnings:
419
+ lines.append("Quality Warnings:")
420
+ for warning in stats.warnings:
421
+ lines.append(f" - {warning}")
422
+ lines.append("")
423
+
424
+ # Streak
425
+ s = stats.streak
426
+ lines.append(f"Streak: {s.current} days (longest: {s.longest} days)")
427
+
428
+ return "\n".join(lines)
429
+
430
+
431
+ def stats_to_dict(stats: BuildlogStats) -> BuildlogStatsDict:
432
+ """Convert BuildlogStats to a JSON-serializable dictionary."""
433
+ return {
434
+ "generated_at": stats.generated_at,
435
+ "entries": {
436
+ "total": stats.entries.total,
437
+ "this_week": stats.entries.this_week,
438
+ "this_month": stats.entries.this_month,
439
+ "with_improvements": stats.entries.with_improvements,
440
+ "coverage_percent": stats.entries.coverage_percent,
441
+ },
442
+ "insights": {
443
+ "total": stats.insights.total,
444
+ "by_category": stats.insights.by_category,
445
+ },
446
+ "top_sources": stats.top_sources,
447
+ "pipeline": {
448
+ "last_distill": stats.pipeline.last_distill,
449
+ "last_skills": stats.pipeline.last_skills,
450
+ "last_export": stats.pipeline.last_export,
451
+ },
452
+ "streak": {
453
+ "current": stats.streak.current,
454
+ "longest": stats.streak.longest,
455
+ },
456
+ "warnings": stats.warnings,
457
+ }
458
+
459
+
460
+ def format_json(stats: BuildlogStats) -> str:
461
+ """Format stats as JSON.
462
+
463
+ Args:
464
+ stats: The BuildlogStats to format.
465
+
466
+ Returns:
467
+ JSON string.
468
+ """
469
+ return json.dumps(stats_to_dict(stats), indent=2)
@@ -0,0 +1,35 @@
1
+ # buildlog-template: Engineering notebook for AI-assisted development
2
+ # https://github.com/peleke/buildlog-template
3
+
4
+ _min_copier_version: "9.0.0"
5
+ _subdirectory: template
6
+
7
+ # Questions
8
+ project_name:
9
+ type: str
10
+ help: Project name (used in templates)
11
+ default: "{{ _copier_conf.dst_path | basename }}"
12
+
13
+ include_ai_reflection:
14
+ type: bool
15
+ help: Include AI Experience Reflection section in template?
16
+ default: true
17
+
18
+ update_claude_md:
19
+ type: bool
20
+ help: Add buildlog instructions to CLAUDE.md if it exists?
21
+ default: true
22
+
23
+ # Post-generation tasks
24
+ _tasks:
25
+ - "{{ 'python3 post_gen.py' if update_claude_md else 'echo Skipping CLAUDE.md update' }}"
26
+
27
+ _message_after_copy: |
28
+ Build journal installed!
29
+
30
+ Usage:
31
+ 1. Copy template: cp buildlog/_TEMPLATE.md buildlog/$(date +%Y-%m-%d)-your-slug.md
32
+ 2. Fill in sections as you work
33
+ 3. Quality bar: Publishable as a $500+ tutorial
34
+
35
+ Update template later: copier update
@@ -0,0 +1,51 @@
1
+ #!/usr/bin/env python3
2
+ """Post-generation script to update CLAUDE.md with buildlog instructions."""
3
+
4
+ import os
5
+ from pathlib import Path
6
+
7
+ CLAUDE_MD_SECTION = '''
8
+ ## Build Journal
9
+
10
+ After completing significant work (features, debugging sessions, deployments, 2+ hour focused sessions), write a build journal entry.
11
+
12
+ **Location:** `buildlog/YYYY-MM-DD-{slug}.md`
13
+ **Template:** `buildlog/_TEMPLATE.md`
14
+
15
+ ### Required Sections
16
+ 1. **The Goal** - What we built and why
17
+ 2. **What We Built** - Architecture diagram, components table
18
+ 3. **The Journey** - Chronological INCLUDING mistakes, wrong turns, actual error messages
19
+ 4. **Test Results** - Actual commands run, actual outputs received
20
+ 5. **Code Samples** - Key snippets with context (not full files)
21
+ 6. **AI Experience Reflection** - Meta-commentary on the collaboration
22
+ 7. **Improvements** - Actionable learnings: architectural, workflow, tool usage, domain knowledge
23
+
24
+ The **Improvements** section is critical - capture concrete insights like "Should have defined the API contract before implementing the client" not vague observations like "Should have planned better."
25
+
26
+ **Quality bar:** Publishable as a $500+ Envato Tuts+/Manning tutorial.
27
+
28
+ After significant work, ask: "Should I write a build journal entry for this?"
29
+ '''
30
+
31
+ def main():
32
+ claude_md = Path("CLAUDE.md")
33
+
34
+ if not claude_md.exists():
35
+ print("No CLAUDE.md found, skipping update")
36
+ return
37
+
38
+ content = claude_md.read_text()
39
+
40
+ if "## Build Journal" in content:
41
+ print("Build Journal section already exists in CLAUDE.md")
42
+ return
43
+
44
+ # Append to end of file
45
+ with open(claude_md, "a") as f:
46
+ f.write("\n" + CLAUDE_MD_SECTION)
47
+
48
+ print("Added Build Journal section to CLAUDE.md")
49
+
50
+ if __name__ == "__main__":
51
+ main()