buildlog 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- buildlog/__init__.py +3 -0
- buildlog/cli.py +437 -0
- buildlog/core/__init__.py +25 -0
- buildlog/core/operations.py +392 -0
- buildlog/distill.py +374 -0
- buildlog/embeddings.py +392 -0
- buildlog/mcp/__init__.py +15 -0
- buildlog/mcp/server.py +29 -0
- buildlog/mcp/tools.py +97 -0
- buildlog/render/__init__.py +41 -0
- buildlog/render/base.py +23 -0
- buildlog/render/claude_md.py +106 -0
- buildlog/render/settings_json.py +96 -0
- buildlog/skills.py +630 -0
- buildlog/stats.py +469 -0
- buildlog-0.1.0.data/data/share/buildlog/copier.yml +35 -0
- buildlog-0.1.0.data/data/share/buildlog/post_gen.py +51 -0
- buildlog-0.1.0.data/data/share/buildlog/template/buildlog/.gitkeep +0 -0
- buildlog-0.1.0.data/data/share/buildlog/template/buildlog/2026-01-01-example.md +269 -0
- buildlog-0.1.0.data/data/share/buildlog/template/buildlog/BUILDLOG_SYSTEM.md +114 -0
- buildlog-0.1.0.data/data/share/buildlog/template/buildlog/_TEMPLATE.md +162 -0
- buildlog-0.1.0.data/data/share/buildlog/template/buildlog/assets/.gitkeep +0 -0
- buildlog-0.1.0.dist-info/METADATA +664 -0
- buildlog-0.1.0.dist-info/RECORD +27 -0
- buildlog-0.1.0.dist-info/WHEEL +4 -0
- buildlog-0.1.0.dist-info/entry_points.txt +3 -0
- buildlog-0.1.0.dist-info/licenses/LICENSE +21 -0
buildlog/distill.py
ADDED
|
@@ -0,0 +1,374 @@
|
|
|
1
|
+
"""Extract and aggregate patterns from buildlog entries."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
__all__ = [
|
|
6
|
+
"CATEGORIES",
|
|
7
|
+
"DistillResult",
|
|
8
|
+
"distill_all",
|
|
9
|
+
"format_output",
|
|
10
|
+
"parse_improvements",
|
|
11
|
+
"parse_date_from_filename",
|
|
12
|
+
"iter_buildlog_entries",
|
|
13
|
+
]
|
|
14
|
+
|
|
15
|
+
import json
|
|
16
|
+
import logging
|
|
17
|
+
import re
|
|
18
|
+
from collections.abc import Iterator
|
|
19
|
+
from dataclasses import dataclass, field
|
|
20
|
+
from datetime import UTC, date, datetime
|
|
21
|
+
from pathlib import Path
|
|
22
|
+
from typing import Final, Literal, TypedDict
|
|
23
|
+
|
|
24
|
+
logger = logging.getLogger(__name__)
|
|
25
|
+
|
|
26
|
+
# Valid improvement categories (lowercase for matching)
|
|
27
|
+
CATEGORIES: Final[tuple[str, ...]] = (
|
|
28
|
+
"architectural",
|
|
29
|
+
"workflow",
|
|
30
|
+
"tool_usage",
|
|
31
|
+
"domain_knowledge",
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
# Map from markdown heading to normalized category name
|
|
35
|
+
CATEGORY_MAP: Final[dict[str, str]] = {
|
|
36
|
+
"architectural": "architectural",
|
|
37
|
+
"workflow": "workflow",
|
|
38
|
+
"tool usage": "tool_usage",
|
|
39
|
+
"tool_usage": "tool_usage",
|
|
40
|
+
"domain knowledge": "domain_knowledge",
|
|
41
|
+
"domain_knowledge": "domain_knowledge",
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
# File matching pattern for buildlog entries
|
|
45
|
+
BUILDLOG_GLOB_PATTERN: Final[str] = "20??-??-??-*.md"
|
|
46
|
+
|
|
47
|
+
# Type definitions
|
|
48
|
+
OutputFormat = Literal["json", "yaml"]
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class PatternDict(TypedDict):
|
|
52
|
+
"""Type for a single pattern dictionary."""
|
|
53
|
+
|
|
54
|
+
insight: str
|
|
55
|
+
source: str
|
|
56
|
+
date: str
|
|
57
|
+
context: str
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
class StatisticsDict(TypedDict):
|
|
61
|
+
"""Type for statistics dictionary."""
|
|
62
|
+
|
|
63
|
+
total_patterns: int
|
|
64
|
+
by_category: dict[str, int]
|
|
65
|
+
by_month: dict[str, int]
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
class DistillResultDict(TypedDict):
|
|
69
|
+
"""Type for full distill result dictionary."""
|
|
70
|
+
|
|
71
|
+
extracted_at: str
|
|
72
|
+
entry_count: int
|
|
73
|
+
patterns: dict[str, list[PatternDict]]
|
|
74
|
+
statistics: StatisticsDict
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
@dataclass
|
|
78
|
+
class DistillResult:
|
|
79
|
+
"""Aggregated patterns from all buildlog entries."""
|
|
80
|
+
|
|
81
|
+
extracted_at: str
|
|
82
|
+
entry_count: int
|
|
83
|
+
patterns: dict[str, list[PatternDict]] = field(default_factory=dict)
|
|
84
|
+
statistics: StatisticsDict = field(default_factory=dict)
|
|
85
|
+
|
|
86
|
+
def to_dict(self) -> DistillResultDict:
|
|
87
|
+
"""Convert to dictionary for JSON/YAML serialization."""
|
|
88
|
+
return {
|
|
89
|
+
"extracted_at": self.extracted_at,
|
|
90
|
+
"entry_count": self.entry_count,
|
|
91
|
+
"patterns": self.patterns,
|
|
92
|
+
"statistics": self.statistics,
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def _is_valid_insight(insight: str) -> bool:
|
|
97
|
+
"""Filter predicate for valid insights (not placeholders)."""
|
|
98
|
+
if not insight:
|
|
99
|
+
return False
|
|
100
|
+
if insight.startswith("[") and insight.endswith("]"):
|
|
101
|
+
return False
|
|
102
|
+
if insight.startswith("e.g.,"):
|
|
103
|
+
return False
|
|
104
|
+
return True
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def extract_title_and_context(content: str) -> str:
|
|
108
|
+
"""Extract a context description from the entry title."""
|
|
109
|
+
match = re.search(r"^#\s+Build Journal:\s*(.+)$", content, re.MULTILINE)
|
|
110
|
+
if match:
|
|
111
|
+
title = match.group(1).strip()
|
|
112
|
+
if title and title != "[TITLE]":
|
|
113
|
+
return title
|
|
114
|
+
return ""
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def _parse_bullet_content(category_content: str) -> list[str]:
|
|
118
|
+
"""Parse bullet points from category content, handling multi-line bullets.
|
|
119
|
+
|
|
120
|
+
A bullet can span multiple lines if continuation lines are indented.
|
|
121
|
+
Example:
|
|
122
|
+
- This is a long insight that
|
|
123
|
+
continues on the next line
|
|
124
|
+
|
|
125
|
+
Returns:
|
|
126
|
+
List of complete bullet point texts.
|
|
127
|
+
"""
|
|
128
|
+
bullets: list[str] = []
|
|
129
|
+
current_bullet: list[str] = []
|
|
130
|
+
|
|
131
|
+
for line in category_content.split("\n"):
|
|
132
|
+
# New bullet point starts with optional whitespace, dash, space, then content
|
|
133
|
+
bullet_match = re.match(r"^\s*-\s+(.+)$", line)
|
|
134
|
+
if bullet_match:
|
|
135
|
+
# Save previous bullet if exists
|
|
136
|
+
if current_bullet:
|
|
137
|
+
bullets.append(" ".join(current_bullet))
|
|
138
|
+
current_bullet = [bullet_match.group(1).strip()]
|
|
139
|
+
elif current_bullet and line.strip():
|
|
140
|
+
# Continuation line: non-empty, not a new bullet
|
|
141
|
+
# Must be indented (starts with whitespace) to be a continuation
|
|
142
|
+
if line.startswith((" ", "\t")):
|
|
143
|
+
current_bullet.append(line.strip())
|
|
144
|
+
# Otherwise it's unrelated content, ignore it
|
|
145
|
+
|
|
146
|
+
# Don't forget the last bullet
|
|
147
|
+
if current_bullet:
|
|
148
|
+
bullets.append(" ".join(current_bullet))
|
|
149
|
+
|
|
150
|
+
return bullets
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def parse_improvements(content: str) -> dict[str, list[str]]:
|
|
154
|
+
"""Extract Improvements section from buildlog markdown.
|
|
155
|
+
|
|
156
|
+
Args:
|
|
157
|
+
content: The full markdown content of a buildlog entry.
|
|
158
|
+
|
|
159
|
+
Returns:
|
|
160
|
+
Dictionary mapping category names to lists of improvement insights.
|
|
161
|
+
"""
|
|
162
|
+
result: dict[str, list[str]] = {cat: [] for cat in CATEGORIES}
|
|
163
|
+
|
|
164
|
+
# Stop at any H1 or H2 header (not H3+), or end of string
|
|
165
|
+
improvements_match = re.search(
|
|
166
|
+
r"^##\s+Improvements\s*\n(.*?)(?=^#{1,2}\s|\Z)",
|
|
167
|
+
content,
|
|
168
|
+
re.MULTILINE | re.DOTALL,
|
|
169
|
+
)
|
|
170
|
+
|
|
171
|
+
if not improvements_match:
|
|
172
|
+
return result
|
|
173
|
+
|
|
174
|
+
improvements_section = improvements_match.group(1)
|
|
175
|
+
|
|
176
|
+
# Match H3 headers but NOT H4+ (use negative lookahead for 4th #)
|
|
177
|
+
category_pattern = re.compile(
|
|
178
|
+
r"^###(?!#)\s+([^\n]+)\s*\n(.*?)(?=^###(?!#)|\Z)", re.MULTILINE | re.DOTALL
|
|
179
|
+
)
|
|
180
|
+
|
|
181
|
+
for category_match in category_pattern.finditer(improvements_section):
|
|
182
|
+
raw_category = category_match.group(1).strip().lower()
|
|
183
|
+
normalized = CATEGORY_MAP.get(raw_category)
|
|
184
|
+
if not normalized:
|
|
185
|
+
continue
|
|
186
|
+
|
|
187
|
+
category_content = category_match.group(2)
|
|
188
|
+
bullets = _parse_bullet_content(category_content)
|
|
189
|
+
result[normalized] = list(filter(_is_valid_insight, bullets))
|
|
190
|
+
|
|
191
|
+
return result
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
def parse_date_from_filename(filename: str) -> str | None:
|
|
195
|
+
"""Extract date from buildlog filename (YYYY-MM-DD-slug.md format)."""
|
|
196
|
+
match = re.match(r"^(\d{4}-\d{2}-\d{2})-", filename)
|
|
197
|
+
return match.group(1) if match else None
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
def _extract_month_key(date_str: str) -> str:
|
|
201
|
+
"""Extract YYYY-MM month key from YYYY-MM-DD date string."""
|
|
202
|
+
return date_str[:7]
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
def iter_buildlog_entries(
|
|
206
|
+
buildlog_dir: Path, since: date | None = None
|
|
207
|
+
) -> Iterator[tuple[Path, str]]:
|
|
208
|
+
"""Iterate over buildlog entries, optionally filtered by date.
|
|
209
|
+
|
|
210
|
+
Args:
|
|
211
|
+
buildlog_dir: Path to the buildlog directory.
|
|
212
|
+
since: If provided, only yield entries from this date onward.
|
|
213
|
+
|
|
214
|
+
Yields:
|
|
215
|
+
Tuples of (file_path, date_string) for each matching entry.
|
|
216
|
+
"""
|
|
217
|
+
for entry_path in sorted(buildlog_dir.glob(BUILDLOG_GLOB_PATTERN)):
|
|
218
|
+
date_str = parse_date_from_filename(entry_path.name)
|
|
219
|
+
if not date_str:
|
|
220
|
+
continue
|
|
221
|
+
|
|
222
|
+
# Always validate the date, not just when filtering
|
|
223
|
+
try:
|
|
224
|
+
entry_date = date.fromisoformat(date_str)
|
|
225
|
+
except ValueError:
|
|
226
|
+
logger.warning("Invalid date in filename: %s", entry_path.name)
|
|
227
|
+
continue
|
|
228
|
+
|
|
229
|
+
if since and entry_date < since:
|
|
230
|
+
continue
|
|
231
|
+
|
|
232
|
+
yield entry_path, date_str
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
def _create_patterns_for_entry(
|
|
236
|
+
improvements: dict[str, list[str]],
|
|
237
|
+
source: str,
|
|
238
|
+
date_str: str,
|
|
239
|
+
context: str,
|
|
240
|
+
) -> dict[str, list[PatternDict]]:
|
|
241
|
+
"""Create pattern dicts from improvements - pure function."""
|
|
242
|
+
return {
|
|
243
|
+
category: [
|
|
244
|
+
PatternDict(
|
|
245
|
+
insight=insight,
|
|
246
|
+
source=source,
|
|
247
|
+
date=date_str,
|
|
248
|
+
context=context,
|
|
249
|
+
)
|
|
250
|
+
for insight in insights
|
|
251
|
+
]
|
|
252
|
+
for category, insights in improvements.items()
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
|
|
256
|
+
def _merge_patterns(
|
|
257
|
+
target: dict[str, list[PatternDict]],
|
|
258
|
+
source: dict[str, list[PatternDict]],
|
|
259
|
+
) -> None:
|
|
260
|
+
"""Merge source patterns into target (mutates target)."""
|
|
261
|
+
for category, patterns in source.items():
|
|
262
|
+
if category in target:
|
|
263
|
+
target[category].extend(patterns)
|
|
264
|
+
|
|
265
|
+
|
|
266
|
+
def _apply_category_filter(
|
|
267
|
+
patterns: dict[str, list[PatternDict]],
|
|
268
|
+
category: str | None,
|
|
269
|
+
) -> dict[str, list[PatternDict]]:
|
|
270
|
+
"""Filter patterns to single category if specified."""
|
|
271
|
+
if category is None:
|
|
272
|
+
return patterns
|
|
273
|
+
return {category: patterns.get(category, [])}
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
def _compute_statistics(
|
|
277
|
+
patterns: dict[str, list[PatternDict]],
|
|
278
|
+
by_month: dict[str, int],
|
|
279
|
+
) -> StatisticsDict:
|
|
280
|
+
"""Compute statistics from aggregated patterns."""
|
|
281
|
+
by_category = {cat: len(items) for cat, items in patterns.items()}
|
|
282
|
+
return {
|
|
283
|
+
"total_patterns": sum(by_category.values()),
|
|
284
|
+
"by_category": by_category,
|
|
285
|
+
"by_month": dict(sorted(by_month.items())),
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
|
|
289
|
+
def distill_all(
|
|
290
|
+
buildlog_dir: Path,
|
|
291
|
+
since: date | None = None,
|
|
292
|
+
category_filter: str | None = None,
|
|
293
|
+
) -> DistillResult:
|
|
294
|
+
"""Parse all buildlog entries and aggregate patterns.
|
|
295
|
+
|
|
296
|
+
Args:
|
|
297
|
+
buildlog_dir: Path to the buildlog directory.
|
|
298
|
+
since: If provided, only include entries from this date onward.
|
|
299
|
+
category_filter: If provided, only include patterns from this category.
|
|
300
|
+
|
|
301
|
+
Returns:
|
|
302
|
+
DistillResult with aggregated patterns and statistics.
|
|
303
|
+
"""
|
|
304
|
+
patterns: dict[str, list[PatternDict]] = {cat: [] for cat in CATEGORIES}
|
|
305
|
+
by_month: dict[str, int] = {}
|
|
306
|
+
entry_count = 0
|
|
307
|
+
|
|
308
|
+
for entry_path, date_str in iter_buildlog_entries(buildlog_dir, since):
|
|
309
|
+
try:
|
|
310
|
+
content = entry_path.read_text(encoding="utf-8")
|
|
311
|
+
except (OSError, UnicodeDecodeError) as e:
|
|
312
|
+
logger.warning("Failed to read %s: %s", entry_path, e)
|
|
313
|
+
continue
|
|
314
|
+
|
|
315
|
+
entry_count += 1
|
|
316
|
+
context = extract_title_and_context(content)
|
|
317
|
+
|
|
318
|
+
month_key = _extract_month_key(date_str)
|
|
319
|
+
by_month[month_key] = by_month.get(month_key, 0) + 1
|
|
320
|
+
|
|
321
|
+
try:
|
|
322
|
+
improvements = parse_improvements(content)
|
|
323
|
+
except re.error as e:
|
|
324
|
+
logger.warning("Failed to parse improvements in %s: %s", entry_path, e)
|
|
325
|
+
continue
|
|
326
|
+
|
|
327
|
+
entry_patterns = _create_patterns_for_entry(
|
|
328
|
+
improvements, str(entry_path), date_str, context
|
|
329
|
+
)
|
|
330
|
+
_merge_patterns(patterns, entry_patterns)
|
|
331
|
+
|
|
332
|
+
patterns = _apply_category_filter(patterns, category_filter)
|
|
333
|
+
statistics = _compute_statistics(patterns, by_month)
|
|
334
|
+
|
|
335
|
+
return DistillResult(
|
|
336
|
+
extracted_at=datetime.now(UTC).isoformat().replace("+00:00", "Z"),
|
|
337
|
+
entry_count=entry_count,
|
|
338
|
+
patterns=patterns,
|
|
339
|
+
statistics=statistics,
|
|
340
|
+
)
|
|
341
|
+
|
|
342
|
+
|
|
343
|
+
def format_output(result: DistillResult, fmt: OutputFormat = "json") -> str:
|
|
344
|
+
"""Format the distill result as JSON or YAML.
|
|
345
|
+
|
|
346
|
+
Args:
|
|
347
|
+
result: The DistillResult to format.
|
|
348
|
+
fmt: Output format, either "json" or "yaml".
|
|
349
|
+
|
|
350
|
+
Returns:
|
|
351
|
+
Formatted string representation.
|
|
352
|
+
|
|
353
|
+
Raises:
|
|
354
|
+
ValueError: If format is not recognized.
|
|
355
|
+
ImportError: If PyYAML is required but not installed.
|
|
356
|
+
"""
|
|
357
|
+
data = result.to_dict()
|
|
358
|
+
|
|
359
|
+
if fmt == "json":
|
|
360
|
+
return json.dumps(data, indent=2, ensure_ascii=False)
|
|
361
|
+
|
|
362
|
+
if fmt == "yaml":
|
|
363
|
+
try:
|
|
364
|
+
import yaml
|
|
365
|
+
except ImportError as e:
|
|
366
|
+
raise ImportError(
|
|
367
|
+
"PyYAML is required for YAML output. Install it with: pip install pyyaml"
|
|
368
|
+
) from e
|
|
369
|
+
return yaml.dump(
|
|
370
|
+
data, default_flow_style=False, allow_unicode=True, sort_keys=False
|
|
371
|
+
)
|
|
372
|
+
|
|
373
|
+
# This should be unreachable due to Literal type, but defensive coding
|
|
374
|
+
raise ValueError(f"Unknown format: {fmt}")
|